summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan_dev.c9
-rw-r--r--net/8021q/vlanproc.c3
-rw-r--r--net/8021q/vlanproc.h4
-rw-r--r--net/9p/trans_fd.c88
-rw-r--r--net/9p/trans_virtio.c2
-rw-r--r--net/Kconfig11
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c113
-rw-r--r--net/batman-adv/bitarray.c14
-rw-r--r--net/batman-adv/bitarray.h14
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c329
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c8
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c96
-rw-r--r--net/batman-adv/distributed-arp-table.h2
-rw-r--r--net/batman-adv/fragmentation.c26
-rw-r--r--net/batman-adv/fragmentation.h4
-rw-r--r--net/batman-adv/gateway_client.c132
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c4
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c79
-rw-r--r--net/batman-adv/hard-interface.h30
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h24
-rw-r--r--net/batman-adv/icmp_socket.c8
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/main.c105
-rw-r--r--net/batman-adv/main.h27
-rw-r--r--net/batman-adv/multicast.c46
-rw-r--r--net/batman-adv/multicast.h4
-rw-r--r--net/batman-adv/network-coding.c149
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c348
-rw-r--r--net/batman-adv/originator.h19
-rw-r--r--net/batman-adv/packet.h19
-rw-r--r--net/batman-adv/routing.c112
-rw-r--r--net/batman-adv/routing.h5
-rw-r--r--net/batman-adv/send.c43
-rw-r--r--net/batman-adv/send.h6
-rw-r--r--net/batman-adv/soft-interface.c73
-rw-r--r--net/batman-adv/soft-interface.h4
-rw-r--r--net/batman-adv/sysfs.c32
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/translation-table.c353
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h67
-rw-r--r--net/bluetooth/6lowpan.c7
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_request.c28
-rw-r--r--net/bluetooth/l2cap_core.c14
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/smp.c16
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c43
-rw-r--r--net/bridge/br_mdb.c128
-rw-r--r--net/bridge/br_multicast.c101
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_stp.c14
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/ceph/auth_x.c49
-rw-r--r--net/ceph/auth_x.h2
-rw-r--r--net/ceph/crush/mapper.c33
-rw-r--r--net/ceph/messenger.c105
-rw-r--r--net/ceph/mon_client.c4
-rw-r--r--net/ceph/osd_client.c10
-rw-r--r--net/ceph/osdmap.c19
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c54
-rw-r--r--net/core/devlink.c738
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c168
-rw-r--r--net/core/ethtool.c638
-rw-r--r--net/core/filter.c105
-rw-r--r--net/core/flow_dissector.c61
-rw-r--r--net/core/lwtunnel.c37
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c70
-rw-r--r--net/core/scm.c7
-rw-r--r--net/core/skbuff.c127
-rw-r--r--net/core/sock.c83
-rw-r--r--net/core/sock_reuseport.c9
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/ipv4.c16
-rw-r--r--net/dccp/ipv6.c18
-rw-r--r--net/dsa/slave.c140
-rw-r--r--net/ethernet/eth.c3
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c19
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/devinet.c28
-rw-r--r--net/ipv4/fib_trie.c11
-rw-r--r--net/ipv4/fou.c14
-rw-r--r--net/ipv4/gre_offload.c100
-rw-r--r--net/ipv4/igmp.c78
-rw-r--r--net/ipv4/inet_connection_sock.c268
-rw-r--r--net/ipv4/inet_diag.c23
-rw-r--r--net/ipv4/inet_hashtables.c237
-rw-r--r--net/ipv4/inet_lro.c374
-rw-r--r--net/ipv4/ip_fragment.c30
-rw-r--r--net/ipv4/ip_gre.c45
-rw-r--r--net/ipv4/ip_input.c33
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ip_tunnel.c98
-rw-r--r--net/ipv4/ip_tunnel_core.c30
-rw-r--r--net/ipv4/ipconfig.c4
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c3
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/ping.c8
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/route.c77
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c237
-rw-r--r--net/ipv4/tcp.c47
-rw-r--r--net/ipv4/tcp_fastopen.c70
-rw-r--r--net/ipv4/tcp_input.c193
-rw-r--r--net/ipv4/tcp_ipv4.c75
-rw-r--r--net/ipv4/tcp_memcontrol.c233
-rw-r--r--net/ipv4/tcp_metrics.c3
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_offload.c8
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/tcp_timer.c23
-rw-r--r--net/ipv4/udp.c68
-rw-r--r--net/ipv4/udp_offload.c102
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c159
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/ila/ila_common.c1
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/inet6_hashtables.c78
-rw-r--r--net/ipv6/ip6_checksum.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_gre.c13
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c103
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c74
-rw-r--r--net/ipv6/reassembly.c6
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/sit.c27
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c33
-rw-r--r--net/ipv6/udp.c96
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/irda/ircomm/ircomm_param.c3
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/l2tp/l2tp_ip6.c3
-rw-r--r--net/l2tp/l2tp_netlink.c18
-rw-r--r--net/l3mdev/l3mdev.c11
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mesh.c11
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/offchannel.c16
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/util.c16
-rw-r--r--net/mpls/mpls_iptunnel.c1
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c43
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_tables_netdev.c8
-rw-r--r--net/netfilter/nfnetlink.c23
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/netfilter/nft_byteorder.c6
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c9
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_socket.c28
-rw-r--r--net/netlink/Kconfig9
-rw-r--r--net/netlink/af_netlink.c771
-rw-r--r--net/netlink/af_netlink.h15
-rw-r--r--net/netlink/diag.c39
-rw-r--r--net/netlink/genetlink.c27
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/actions.c27
-rw-r--r--net/openvswitch/datapath.c85
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/flow_netlink.c6
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport-vxlan.c4
-rw-r--r--net/openvswitch/vport.h7
-rw-r--r--net/packet/af_packet.c451
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/ib.c34
-rw-r--r--net/rds/iw.c23
-rw-r--r--net/rds/tcp.c1
-rw-r--r--net/rfkill/core.c16
-rw-r--r--net/sched/act_api.c137
-rw-r--r--net/sched/act_bpf.c52
-rw-r--r--net/sched/act_connmark.c54
-rw-r--r--net/sched/act_csum.c67
-rw-r--r--net/sched/act_gact.c55
-rw-r--r--net/sched/act_ipt.c127
-rw-r--r--net/sched/act_mirred.c54
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c54
-rw-r--r--net/sched/act_police.c52
-rw-r--r--net/sched/act_simple.c55
-rw-r--r--net/sched/act_skbedit.c54
-rw-r--r--net/sched/act_vlan.c54
-rw-r--r--net/sched/cls_u32.c118
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_mqprio.c8
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c13
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/chunk.c13
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/input.c57
-rw-r--r--net/sctp/proc.c20
-rw-r--r--net/sctp/protocol.c46
-rw-r--r--net/sctp/sm_sideeffect.c12
-rw-r--r--net/sctp/socket.c14
-rw-r--r--net/sctp/transport.c8
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c3
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/rpc_pipe.c62
-rw-r--r--net/sunrpc/svc_xprt.c45
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c8
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c26
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c64
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c181
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c371
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c56
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c360
-rw-r--r--net/sunrpc/xprtrdma/transport.c33
-rw-r--r--net/sunrpc/xprtrdma/verbs.c40
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h35
-rw-r--r--net/sunrpc/xprtsock.c63
-rw-r--r--net/switchdev/switchdev.c15
-rw-r--r--net/tipc/link.c49
-rw-r--r--net/tipc/link.h6
-rw-r--r--net/tipc/name_table.c14
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/node.c117
-rw-r--r--net/tipc/server.c4
-rw-r--r--net/tipc/subscr.c131
-rw-r--r--net/tipc/subscr.h11
-rw-r--r--net/unix/af_unix.c36
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c8
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/wireless/reg.c45
-rw-r--r--net/xfrm/xfrm_output.c2
305 files changed, 8474 insertions, 5433 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d2cd9de4b724..69929c05c843 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -261,7 +261,6 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
* hope the underlying device can handle it.
*/
new_dev->mtu = real_dev->mtu;
- new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
vlan = vlan_dev_priv(new_dev);
vlan->vlan_proto = htons(ETH_P_8021Q);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ad5e2fd1012c..e416a4038a12 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -621,12 +621,12 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
return features;
}
-static int vlan_ethtool_get_settings(struct net_device *dev,
- struct ethtool_cmd *cmd)
+static int vlan_ethtool_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- return __ethtool_get_settings(vlan->real_dev, cmd);
+ return __ethtool_get_link_ksettings(vlan->real_dev, cmd);
}
static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -741,7 +741,7 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
}
static const struct ethtool_ops vlan_ethtool_ops = {
- .get_settings = vlan_ethtool_get_settings,
+ .get_link_ksettings = vlan_ethtool_get_link_ksettings,
.get_drvinfo = vlan_ethtool_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ts_info = vlan_ethtool_get_ts_info,
@@ -799,6 +799,7 @@ void vlan_setup(struct net_device *dev)
ether_setup(dev);
dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_UNICAST_FLT;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
netif_keep_dst(dev);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index ae63cf72a953..5f1446c9f098 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -184,12 +184,11 @@ int vlan_proc_add_dev(struct net_device *vlandev)
/*
* Delete directory entry for VLAN device.
*/
-int vlan_proc_rem_dev(struct net_device *vlandev)
+void vlan_proc_rem_dev(struct net_device *vlandev)
{
/** NOTE: This will consume the memory pointed to by dent, it seems. */
proc_remove(vlan_dev_priv(vlandev)->dent);
vlan_dev_priv(vlandev)->dent = NULL;
- return 0;
}
/****** Proc filesystem entry points ****************************************/
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 063f60a3d5cc..8838a2e92eb6 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -5,7 +5,7 @@
struct net;
int vlan_proc_init(struct net *net);
-int vlan_proc_rem_dev(struct net_device *vlandev);
+void vlan_proc_rem_dev(struct net_device *vlandev);
int vlan_proc_add_dev(struct net_device *vlandev);
void vlan_proc_cleanup(struct net *net);
@@ -14,7 +14,7 @@ void vlan_proc_cleanup(struct net *net);
#define vlan_proc_init(net) (0)
#define vlan_proc_cleanup(net) do {} while (0)
#define vlan_proc_add_dev(dev) ({(void)(dev), 0; })
-#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; })
+#define vlan_proc_rem_dev(dev) do {} while (0)
#endif
#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index bced8c074c12..7bc2208b6cc4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -108,9 +108,7 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
* @wsize: amount of data to write for current frame
* @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
struct list_head unsent_req_list;
struct p9_req_t *req;
char tmp_buf[7];
- int rsize;
- int rpos;
- char *rbuf;
+ struct p9_fcall rc;
int wpos;
int wsize;
char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
if (m->err < 0)
return;
- p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
- if (!m->rbuf) {
- m->rbuf = m->tmp_buf;
- m->rpos = 0;
- m->rsize = 7; /* start by reading header */
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+ m->rc.capacity = 7; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
- p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
- m, m->rpos, m->rsize, m->rsize-m->rpos);
- err = p9_fd_read(m->client, m->rbuf + m->rpos,
- m->rsize - m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+ m, m->rc.offset, m->rc.capacity,
+ m->rc.capacity - m->rc.offset);
+ err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+ m->rc.capacity - m->rc.offset);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN)
goto end_clear;
- }
if (err <= 0)
goto error;
- m->rpos += err;
+ m->rc.offset += err;
- if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
- u16 tag;
+ /* header read in */
+ if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new header\n");
- n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
- if (n >= m->client->msize) {
+ err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+ "error parsing header: %d\n", err);
+ goto error;
+ }
+
+ if (m->rc.size >= m->client->msize) {
p9_debug(P9_DEBUG_ERROR,
- "requested packet size too big: %d\n", n);
+ "requested packet size too big: %d\n",
+ m->rc.size);
err = -EIO;
goto error;
}
- tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
p9_debug(P9_DEBUG_TRANS,
- "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+ "mux %p pkt: size: %d bytes tag: %d\n",
+ m, m->rc.size, m->rc.tag);
- m->req = p9_tag_lookup(m->client, tag);
+ m->req = p9_tag_lookup(m->client, m->rc.tag);
if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
- tag);
+ m->rc.tag);
err = -EIO;
goto error;
}
if (m->req->rc == NULL) {
- m->req->rc = kmalloc(sizeof(struct p9_fcall) +
- m->client->msize, GFP_NOFS);
- if (!m->req->rc) {
- m->req = NULL;
- err = -ENOMEM;
- goto error;
- }
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ m->rc.tag, m->req);
+ m->req = NULL;
+ err = -EIO;
+ goto error;
}
- m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
- memcpy(m->rbuf, m->tmp_buf, m->rsize);
- m->rsize = n;
+ m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+ m->rc.capacity = m->rc.size;
}
- /* not an else because some packets (like clunk) have no payload */
- if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+ /* packet is read in
+ * not an else because some packets (like clunk) have no payload
+ */
+ if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
p9_client_cb(m->client, m->req, status);
- m->rbuf = NULL;
- m->rpos = 0;
- m->rsize = 0;
+ m->rc.sdata = NULL;
+ m->rc.offset = 0;
+ m->rc.capacity = 0;
m->req = NULL;
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 199bc76202d2..4acb1d5417aa 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
mutex_unlock(&virtio_9p_lock);
if (!found) {
- pr_err("no channels available\n");
+ pr_err("no channels available for device %s\n", devname);
return ret;
}
diff --git a/net/Kconfig b/net/Kconfig
index 174354618f8a..6c9cfb0d7639 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -392,6 +392,17 @@ config LWTUNNEL
weight tunnel endpoint. Tunnel encapsulation parameters are stored
with light weight tunnel state associated with fib routes.
+config DST_CACHE
+ bool "dst cache"
+ default n
+
+config NET_DEVLINK
+ tristate "Network physical/parent device Netlink interface"
+ help
+ Network physical/parent device Netlink interface provides
+ infrastructure to support access to physical chip-wide config and
+ monitoring.
+
endif # if NET
# Used by archs to tell that they support BPF_JIT
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index d5871ac493eb..f066781be3c8 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1625,7 +1625,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
rt = atrtr_find(&at_hint);
}
- err = ENETUNREACH;
+ err = -ENETUNREACH;
if (!rt)
goto out;
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c6fc8f756c9a..2dd40e5ea030 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -12,7 +12,7 @@ config BATMAN_ADV
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
a routing protocol for multi-hop ad-hoc mesh networks. The
networks may be wired or wireless. See
- http://www.open-mesh.org/ for more information and user space
+ https://www.open-mesh.org/ for more information and user space
tools.
config BATMAN_ADV_BLA
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 21434ab79d2c..207e2af316c7 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 4e59cf3eb079..a7485d676088 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index df625de55ef2..5651e33ca6bd 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -31,6 +31,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/list.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <linux/printk.h>
@@ -88,7 +89,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
* in the given ring buffer
* @lq_recv: pointer to the ring buffer
*
- * Returns computed average value.
+ * Return: computed average value.
*/
static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
{
@@ -132,7 +133,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
*
- * Returns 0 on success, a negative error code otherwise.
+ * Return: 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
int max_if_num)
@@ -180,7 +181,7 @@ unlock:
* @max_if_num: the current amount of interfaces
* @del_if_num: the index of the interface being removed
*
- * Returns 0 on success, a negative error code otherwise.
+ * Return: 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
int max_if_num, int del_if_num)
@@ -246,7 +247,7 @@ unlock:
* @bat_priv: the bat priv with all the soft interface information
* @addr: mac address of the originator
*
- * Returns the originator object corresponding to the passed mac address or NULL
+ * Return: the originator object corresponding to the passed mac address or NULL
* on failure.
* If the object does not exists it is created an initialised.
*/
@@ -286,8 +287,8 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
free_orig_node:
/* free twice, as batadv_orig_node_new sets refcount to 2 */
- batadv_orig_node_free_ref(orig_node);
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return NULL;
}
@@ -396,7 +397,14 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
return new_tq;
}
-/* is there another aggregated packet here? */
+/**
+ * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached
+ * @buff_pos: current position in the skb
+ * @packet_len: total length of the skb
+ * @tvlv_len: tvlv length of the previously considered OGM
+ *
+ * Return: true if there is enough space for another OGM, false otherwise.
+ */
static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
__be16 tvlv_len)
{
@@ -507,7 +515,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -522,7 +530,7 @@ out:
* @if_outgoing: interface for which the retransmission should be considered
* @forw_packet: the forwarded packet which should be checked
*
- * Returns true if new_packet can be aggregated with forw_packet
+ * Return: true if new_packet can be aggregated with forw_packet
*/
static bool
batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
@@ -609,7 +617,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return res;
}
@@ -636,10 +644,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
unsigned char *skb_buff;
unsigned int skb_size;
- if (!atomic_inc_not_zero(&if_incoming->refcount))
+ if (!kref_get_unless_zero(&if_incoming->refcount))
return;
- if (!atomic_inc_not_zero(&if_outgoing->refcount))
+ if (!kref_get_unless_zero(&if_outgoing->refcount))
goto out_free_incoming;
/* own packet should always be scheduled */
@@ -703,9 +711,9 @@ out_nomem:
if (!own_packet)
atomic_inc(&bat_priv->batman_queue_left);
out_free_outgoing:
- batadv_hardif_free_ref(if_outgoing);
+ batadv_hardif_put(if_outgoing);
out_free_incoming:
- batadv_hardif_free_ref(if_incoming);
+ batadv_hardif_put(if_incoming);
}
/* aggregate a new packet into the existing ogm packet */
@@ -950,7 +958,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -995,9 +1003,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_addr = tmp_neigh_node->addr;
if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
tmp_neigh_node->if_incoming == if_incoming &&
- atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
+ kref_get_unless_zero(&tmp_neigh_node->refcount)) {
if (WARN(neigh_node, "too many matching neigh_nodes"))
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
neigh_node = tmp_neigh_node;
continue;
}
@@ -1018,7 +1026,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_ifinfo->bat_iv.tq_avg = tq_avg;
spin_unlock_bh(&tmp_neigh_node->ifinfo_lock);
- batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
neigh_ifinfo = NULL;
}
@@ -1033,7 +1041,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
ethhdr->h_source,
orig_node, orig_tmp);
- batadv_orig_node_free_ref(orig_tmp);
+ batadv_orig_node_put(orig_tmp);
if (!neigh_node)
goto unlock;
} else {
@@ -1108,13 +1116,13 @@ unlock:
rcu_read_unlock();
out:
if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
if (router)
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
if (neigh_ifinfo)
- batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
if (router_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
/**
@@ -1125,7 +1133,7 @@ out:
* @if_incoming: interface where the packet was received
* @if_outgoing: interface for which the retransmission should be considered
*
- * Returns 1 if the link can be considered bidirectional, 0 otherwise
+ * Return: 1 if the link can be considered bidirectional, 0 otherwise
*/
static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh_node,
@@ -1154,7 +1162,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
if (tmp_neigh_node->if_incoming != if_incoming)
continue;
- if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
continue;
neigh_node = tmp_neigh_node;
@@ -1184,7 +1192,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
if (neigh_ifinfo) {
neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
- batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
} else {
neigh_rq_count = 0;
}
@@ -1257,7 +1265,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
out:
if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
@@ -1269,7 +1277,7 @@ out:
* @if_incoming: interface on which the OGM packet was received
* @if_outgoing: interface for which the retransmission should be considered
*
- * Returns duplicate status as enum batadv_dup_status
+ * Return: duplicate status as enum batadv_dup_status
*/
static enum batadv_dup_status
batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
@@ -1298,7 +1306,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
if (WARN_ON(!orig_ifinfo)) {
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return 0;
}
@@ -1308,7 +1316,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
/* signalize caller that the packet is to be dropped. */
if (!hlist_empty(&orig_node->neigh_list) &&
batadv_window_protected(bat_priv, seq_diff,
- &orig_ifinfo->batman_seqno_reset)) {
+ BATADV_TQ_LOCAL_WINDOW_SIZE,
+ &orig_ifinfo->batman_seqno_reset, NULL)) {
ret = BATADV_PROTECTED;
goto out;
}
@@ -1344,7 +1353,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
packet_count = bitmap_weight(bitmap,
BATADV_TQ_LOCAL_WINDOW_SIZE);
neigh_ifinfo->bat_iv.real_packet_count = packet_count;
- batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
rcu_read_unlock();
@@ -1358,8 +1367,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
out:
spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
- batadv_orig_node_free_ref(orig_node);
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_node_put(orig_node);
+ batadv_orig_ifinfo_put(orig_ifinfo);
return ret;
}
@@ -1505,7 +1514,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
ogm_packet, if_incoming,
if_outgoing, dup_status);
}
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
/* only forward for specific interface, not for the default one. */
if (if_outgoing == BATADV_IF_DEFAULT)
@@ -1554,18 +1563,18 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
out_neigh:
if ((orig_neigh_node) && (!is_single_hop_neigh))
- batadv_orig_node_free_ref(orig_neigh_node);
+ batadv_orig_node_put(orig_neigh_node);
out:
if (router_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
if (router_router)
- batadv_neigh_node_free_ref(router_router);
+ batadv_neigh_node_put(router_router);
if (orig_neigh_router)
- batadv_neigh_node_free_ref(orig_neigh_router);
+ batadv_neigh_node_put(orig_neigh_router);
if (hardif_neigh)
- batadv_hardif_neigh_free_ref(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
kfree_skb(skb_priv);
}
@@ -1688,7 +1697,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: originator packet from myself (via neighbor)\n");
- batadv_orig_node_free_ref(orig_neigh_node);
+ batadv_orig_node_put(orig_neigh_node);
return;
}
@@ -1726,7 +1735,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
}
rcu_read_unlock();
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1796,7 +1805,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
neigh_node->addr,
n_ifinfo->bat_iv.tq_avg);
- batadv_neigh_ifinfo_free_ref(n_ifinfo);
+ batadv_neigh_ifinfo_put(n_ifinfo);
}
}
@@ -1859,9 +1868,9 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
batman_count++;
next:
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
if (n_ifinfo)
- batadv_neigh_ifinfo_free_ref(n_ifinfo);
+ batadv_neigh_ifinfo_put(n_ifinfo);
}
rcu_read_unlock();
}
@@ -1929,7 +1938,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
* @neigh2: the second neighbor object of the comparison
* @if_outgoing2: outgoing interface for the second neighbor
*
- * Returns a value less, equal to or greater than 0 if the metric via neigh1 is
+ * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
* lower, the same as or higher than the metric via neigh2
*/
static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
@@ -1955,9 +1964,9 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
out:
if (neigh1_ifinfo)
- batadv_neigh_ifinfo_free_ref(neigh1_ifinfo);
+ batadv_neigh_ifinfo_put(neigh1_ifinfo);
if (neigh2_ifinfo)
- batadv_neigh_ifinfo_free_ref(neigh2_ifinfo);
+ batadv_neigh_ifinfo_put(neigh2_ifinfo);
return diff;
}
@@ -1970,7 +1979,7 @@ out:
* @neigh2: the second neighbor object of the comparison
* @if_outgoing2: outgoing interface for the second neighbor
*
- * Returns true if the metric via neigh1 is equally good or better than
+ * Return: true if the metric via neigh1 is equally good or better than
* the metric via neigh2, false otherwise.
*/
static bool
@@ -1998,9 +2007,9 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
out:
if (neigh1_ifinfo)
- batadv_neigh_ifinfo_free_ref(neigh1_ifinfo);
+ batadv_neigh_ifinfo_put(neigh1_ifinfo);
if (neigh2_ifinfo)
- batadv_neigh_ifinfo_free_ref(neigh2_ifinfo);
+ batadv_neigh_ifinfo_put(neigh2_ifinfo);
return ret;
}
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 25cbc36e997a..b56bb000a0ab 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -29,10 +29,16 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE);
}
-/* receive and process one packet within the sequence number window.
+/**
+ * batadv_bit_get_packet - receive and process one packet within the sequence
+ * number window
+ * @priv: the bat priv with all the soft interface information
+ * @seq_bits: pointer to the sequence number receive packet
+ * @seq_num_diff: difference between the current/received sequence number and
+ * the last sequence number
+ * @set_mark: whether this packet should be marked in seq_bits
*
- * returns:
- * 1 if the window was moved (either new or very old)
+ * Return: 1 if the window was moved (either new or very old),
* 0 if the window was not moved/shifted.
*/
int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 0226b220fe5b..3e41bb80eb81 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -24,7 +24,14 @@
#include <linux/compiler.h>
#include <linux/types.h>
-/* Returns 1 if the corresponding bit in the given seq_bits indicates true
+/**
+ * batadv_test_bit - check if bit is set in the current window
+ *
+ * @seq_bits: pointer to the sequence number receive packet
+ * @last_seqno: latest sequence number in seq_bits
+ * @curr_seqno: sequence number to test for
+ *
+ * Return: 1 if the corresponding bit in the given seq_bits indicates true
* and curr_seqno is within range of last_seqno. Otherwise returns 0.
*/
static inline int batadv_test_bit(const unsigned long *seq_bits,
@@ -48,9 +55,6 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
set_bit(n, seq_bits); /* turn the position on */
}
-/* receive and process one packet, returns 1 if received seq_num is considered
- * new, 0 if old
- */
int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
int set_mark);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index d5d71ac96c8a..0a6c8b824a00 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
@@ -31,6 +31,7 @@
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -58,7 +59,13 @@ static void
batadv_bla_send_announce(struct batadv_priv *bat_priv,
struct batadv_bla_backbone_gw *backbone_gw);
-/* return the index of the claim */
+/**
+ * batadv_choose_claim - choose the right bucket for a claim.
+ * @data: data to hash
+ * @size: size of the hash table
+ *
+ * Return: the hash index of the claim
+ */
static inline u32 batadv_choose_claim(const void *data, u32 size)
{
struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -70,7 +77,13 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
return hash % size;
}
-/* return the index of the backbone gateway */
+/**
+ * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway.
+ * @data: data to hash
+ * @size: size of the hash table
+ *
+ * Return: the hash index of the backbone gateway
+ */
static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
{
const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -82,7 +95,13 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
return hash % size;
}
-/* compares address and vid of two backbone gws */
+/**
+ * batadv_compare_backbone_gw - compare address and vid of two backbone gws
+ * @node: list node of the first entry to compare
+ * @data2: pointer to the second backbone gateway
+ *
+ * Return: 1 if the backbones have the same data, 0 otherwise
+ */
static int batadv_compare_backbone_gw(const struct hlist_node *node,
const void *data2)
{
@@ -100,7 +119,13 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
return 1;
}
-/* compares address and vid of two claims */
+/**
+ * batadv_compare_backbone_gw - compare address and vid of two claims
+ * @node: list node of the first entry to compare
+ * @data2: pointer to the second claims
+ *
+ * Return: 1 if the claim have the same data, 0 otherwise
+ */
static int batadv_compare_claim(const struct hlist_node *node,
const void *data2)
{
@@ -118,39 +143,62 @@ static int batadv_compare_claim(const struct hlist_node *node,
return 1;
}
-/* free a backbone gw */
-static void
-batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
+/**
+ * batadv_backbone_gw_release - release backbone gw from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the backbone gw
+ */
+static void batadv_backbone_gw_release(struct kref *ref)
+{
+ struct batadv_bla_backbone_gw *backbone_gw;
+
+ backbone_gw = container_of(ref, struct batadv_bla_backbone_gw,
+ refcount);
+
+ kfree_rcu(backbone_gw, rcu);
+}
+
+/**
+ * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly
+ * release it
+ * @backbone_gw: backbone gateway to be free'd
+ */
+static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
{
- if (atomic_dec_and_test(&backbone_gw->refcount))
- kfree_rcu(backbone_gw, rcu);
+ kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
}
-/* finally deinitialize the claim */
-static void batadv_claim_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_claim_release - release claim from lists and queue for free after rcu
+ * grace period
+ * @ref: kref pointer of the claim
+ */
+static void batadv_claim_release(struct kref *ref)
{
struct batadv_bla_claim *claim;
- claim = container_of(rcu, struct batadv_bla_claim, rcu);
+ claim = container_of(ref, struct batadv_bla_claim, refcount);
- batadv_backbone_gw_free_ref(claim->backbone_gw);
- kfree(claim);
+ batadv_backbone_gw_put(claim->backbone_gw);
+ kfree_rcu(claim, rcu);
}
-/* free a claim, call claim_free_rcu if its the last reference */
-static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
+/**
+ * batadv_claim_put - decrement the claim refcounter and possibly
+ * release it
+ * @claim: claim to be free'd
+ */
+static void batadv_claim_put(struct batadv_bla_claim *claim)
{
- if (atomic_dec_and_test(&claim->refcount))
- call_rcu(&claim->rcu, batadv_claim_free_rcu);
+ kref_put(&claim->refcount, batadv_claim_release);
}
/**
- * batadv_claim_hash_find
+ * batadv_claim_hash_find - looks for a claim in the claim hash
* @bat_priv: the bat priv with all the soft interface information
* @data: search data (may be local/static data)
*
- * looks for a claim in the hash, and returns it if found
- * or NULL otherwise.
+ * Return: claim if found or NULL otherwise.
*/
static struct batadv_bla_claim
*batadv_claim_hash_find(struct batadv_priv *bat_priv,
@@ -173,7 +221,7 @@ static struct batadv_bla_claim
if (!batadv_compare_claim(&claim->hash_entry, data))
continue;
- if (!atomic_inc_not_zero(&claim->refcount))
+ if (!kref_get_unless_zero(&claim->refcount))
continue;
claim_tmp = claim;
@@ -185,12 +233,12 @@ static struct batadv_bla_claim
}
/**
- * batadv_backbone_hash_find - looks for a claim in the hash
+ * batadv_backbone_hash_find - looks for a backbone gateway in the hash
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address of the originator
* @vid: the VLAN ID
*
- * Returns claim if found or NULL otherwise.
+ * Return: backbone gateway if found or NULL otherwise
*/
static struct batadv_bla_backbone_gw *
batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
@@ -217,7 +265,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
&search_entry))
continue;
- if (!atomic_inc_not_zero(&backbone_gw->refcount))
+ if (!kref_get_unless_zero(&backbone_gw->refcount))
continue;
backbone_gw_tmp = backbone_gw;
@@ -228,7 +276,10 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
return backbone_gw_tmp;
}
-/* delete all claims for a backbone */
+/**
+ * batadv_bla_del_backbone_claims - delete all claims for a backbone
+ * @backbone_gw: backbone gateway where the claims should be removed
+ */
static void
batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
{
@@ -253,7 +304,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
if (claim->backbone_gw != backbone_gw)
continue;
- batadv_claim_free_ref(claim);
+ batadv_claim_put(claim);
hlist_del_rcu(&claim->hash_entry);
}
spin_unlock_bh(list_lock);
@@ -372,18 +423,17 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
netif_rx(skb);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
- * batadv_bla_get_backbone_gw
+ * batadv_bla_get_backbone_gw - finds or creates a backbone gateway
* @bat_priv: the bat priv with all the soft interface information
* @orig: the mac address of the originator
* @vid: the VLAN ID
* @own_backbone: set if the requested backbone is local
*
- * searches for the backbone gw or creates a new one if it could not
- * be found.
+ * Return: the (possibly created) backbone gateway or NULL on error
*/
static struct batadv_bla_backbone_gw *
batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
@@ -416,7 +466,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
ether_addr_copy(entry->orig, orig);
/* one for the hash, one for returning */
- atomic_set(&entry->refcount, 2);
+ kref_init(&entry->refcount);
+ kref_get(&entry->refcount);
hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
batadv_compare_backbone_gw,
@@ -434,7 +485,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
if (orig_node) {
batadv_tt_global_del_orig(bat_priv, orig_node, vid,
"became a backbone gateway");
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
if (own_backbone) {
@@ -449,7 +500,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
return entry;
}
-/* update or add the own backbone gw to make sure we announce
+/**
+ * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the selected primary interface
+ * @vid: VLAN identifier
+ *
+ * update or add the own backbone gw to make sure we announce
* where we receive other backbone gws
*/
static void
@@ -466,7 +523,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
return;
backbone_gw->lasttime = jiffies;
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
}
/**
@@ -515,7 +572,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
/* finally, send an announcement frame */
batadv_bla_send_announce(bat_priv, backbone_gw);
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
}
/**
@@ -546,12 +603,9 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
}
/**
- * batadv_bla_send_announce
+ * batadv_bla_send_announce - Send an announcement frame
* @bat_priv: the bat priv with all the soft interface information
* @backbone_gw: our backbone gateway which should be announced
- *
- * This function sends an announcement. It is called from multiple
- * places.
*/
static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
struct batadv_bla_backbone_gw *backbone_gw)
@@ -599,7 +653,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
claim->lasttime = jiffies;
claim->backbone_gw = backbone_gw;
- atomic_set(&claim->refcount, 2);
+ kref_init(&claim->refcount);
+ kref_get(&claim->refcount);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
mac, BATADV_PRINT_VID(vid));
@@ -626,10 +681,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
spin_lock_bh(&claim->backbone_gw->crc_lock);
claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
spin_unlock_bh(&claim->backbone_gw->crc_lock);
- batadv_backbone_gw_free_ref(claim->backbone_gw);
+ batadv_backbone_gw_put(claim->backbone_gw);
}
/* set (new) backbone gw */
- atomic_inc(&backbone_gw->refcount);
+ kref_get(&backbone_gw->refcount);
claim->backbone_gw = backbone_gw;
spin_lock_bh(&backbone_gw->crc_lock);
@@ -638,11 +693,14 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
backbone_gw->lasttime = jiffies;
claim_free_ref:
- batadv_claim_free_ref(claim);
+ batadv_claim_put(claim);
}
-/* Delete a claim from the claim hash which has the
- * given mac address and vid.
+/**
+ * batadv_bla_del_claim - delete a claim from the claim hash
+ * @bat_priv: the bat priv with all the soft interface information
+ * @mac: mac address of the claim to be removed
+ * @vid: VLAN id for the claim to be removed
*/
static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
const u8 *mac, const unsigned short vid)
@@ -660,17 +718,25 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
- batadv_claim_free_ref(claim); /* reference from the hash is gone */
+ batadv_claim_put(claim); /* reference from the hash is gone */
spin_lock_bh(&claim->backbone_gw->crc_lock);
claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
spin_unlock_bh(&claim->backbone_gw->crc_lock);
/* don't need the reference from hash_find() anymore */
- batadv_claim_free_ref(claim);
+ batadv_claim_put(claim);
}
-/* check for ANNOUNCE frame, return 1 if handled */
+/**
+ * batadv_handle_announce - check for ANNOUNCE frame
+ * @bat_priv: the bat priv with all the soft interface information
+ * @an_addr: announcement mac address (ARP Sender HW address)
+ * @backbone_addr: originator address of the sender (Ethernet source MAC)
+ * @vid: the VLAN ID of the frame
+ *
+ * Return: 1 if handled
+ */
static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
u8 *backbone_addr, unsigned short vid)
{
@@ -716,11 +782,20 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
}
}
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
return 1;
}
-/* check for REQUEST frame, return 1 if handled */
+/**
+ * batadv_handle_request - check for REQUEST frame
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the primary hard interface of this batman soft interface
+ * @backbone_addr: backbone address to be requested (ARP sender HW MAC)
+ * @ethhdr: ethernet header of a packet
+ * @vid: the VLAN ID of the frame
+ *
+ * Return: 1 if handled
+ */
static int batadv_handle_request(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
u8 *backbone_addr, struct ethhdr *ethhdr,
@@ -744,7 +819,16 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
return 1;
}
-/* check for UNCLAIM frame, return 1 if handled */
+/**
+ * batadv_handle_unclaim - check for UNCLAIM frame
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the primary hard interface of this batman soft interface
+ * @backbone_addr: originator address of the backbone (Ethernet source)
+ * @claim_addr: Client to be unclaimed (ARP sender HW MAC)
+ * @vid: the VLAN ID of the frame
+ *
+ * Return: 1 if handled
+ */
static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
u8 *backbone_addr, u8 *claim_addr,
@@ -769,11 +853,20 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
return 1;
}
-/* check for CLAIM frame, return 1 if handled */
+/**
+ * batadv_handle_claim - check for CLAIM frame
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the primary hard interface of this batman soft interface
+ * @backbone_addr: originator address of the backbone (Ethernet Source)
+ * @claim_addr: client mac address to be claimed (ARP sender HW MAC)
+ * @vid: the VLAN ID of the frame
+ *
+ * Return: 1 if handled
+ */
static int batadv_handle_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
u8 *backbone_addr, u8 *claim_addr,
@@ -797,12 +890,12 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
/* TODO: we could call something like tt_local_del() here. */
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
return 1;
}
/**
- * batadv_check_claim_group
+ * batadv_check_claim_group - check for claim group membership
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary interface of this batman interface
* @hw_src: the Hardware source in the ARP Header
@@ -813,7 +906,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
* This function also applies the group ID of the sender
* if it is in the same mesh.
*
- * returns:
+ * Return:
* 2 - if it is a claim packet and on the same group
* 1 - if is a claim packet from another group
* 0 - if it is not a claim packet
@@ -871,20 +964,18 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
bla_dst_own->group = bla_dst->group;
}
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return 2;
}
/**
- * batadv_bla_process_claim
+ * batadv_bla_process_claim - Check if this is a claim frame, and process it
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary hard interface of this batman soft interface
* @skb: the frame to be checked
*
- * Check if this is a claim frame, and process it accordingly.
- *
- * returns 1 if it was a claim frame, otherwise return 0 to
+ * Return: 1 if it was a claim frame, otherwise return 0 to
* tell the callee that it can use the frame on its own.
*/
static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
@@ -1015,7 +1106,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
return 1;
}
-/* Check when we last heard from other nodes, and remove them in case of
+/**
+ * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or
+ * immediately
+ * @bat_priv: the bat priv with all the soft interface information
+ * @now: whether the whole hash shall be wiped now
+ *
+ * Check when we last heard from other nodes, and remove them in case of
* a time out, or clean all backbone gws if now is set.
*/
static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
@@ -1056,14 +1153,14 @@ purge_now:
batadv_bla_del_backbone_claims(backbone_gw);
hlist_del_rcu(&backbone_gw->hash_entry);
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
}
spin_unlock_bh(list_lock);
}
}
/**
- * batadv_bla_purge_claims
+ * batadv_bla_purge_claims - Remove claims after a timeout or immediately
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the selected primary interface, may be NULL if now is set
* @now: whether the whole hash shall be wiped now
@@ -1112,12 +1209,11 @@ purge_now:
}
/**
- * batadv_bla_update_orig_address
+ * batadv_bla_update_orig_address - Update the backbone gateways when the own
+ * originator address changes
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the new selected primary_if
* @oldif: the old primary interface, may be NULL
- *
- * Update the backbone gateways when the own orig address changes.
*/
void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
@@ -1185,10 +1281,14 @@ void batadv_bla_status_update(struct net_device *net_dev)
* so just call that one.
*/
batadv_bla_update_orig_address(bat_priv, primary_if, primary_if);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
-/* periodic work to do:
+/**
+ * batadv_bla_periodic_work - performs periodic bla work
+ * @work: kernel work struct
+ *
+ * periodic work to do:
* * purge structures when they are too old
* * send announcements
*/
@@ -1255,7 +1355,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1269,7 +1369,12 @@ out:
static struct lock_class_key batadv_claim_hash_lock_class_key;
static struct lock_class_key batadv_backbone_hash_lock_class_key;
-/* initialize all bla structures */
+/**
+ * batadv_bla_init - initialize all bla structures
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success, < 0 on error.
+ */
int batadv_bla_init(struct batadv_priv *bat_priv)
{
int i;
@@ -1289,7 +1394,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
if (primary_if) {
crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN);
bat_priv->bla.claim_dest.group = htons(crc);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
} else {
bat_priv->bla.claim_dest.group = 0; /* will be set later */
}
@@ -1324,7 +1429,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_bla_check_bcast_duplist
+ * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup.
* @bat_priv: the bat priv with all the soft interface information
* @skb: contains the bcast_packet to be checked
*
@@ -1336,6 +1441,8 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
* with a good chance that it is the same packet. If it is furthermore
* sent by another host, drop it. We allow equal packets from
* the same host however as this might be intended.
+ *
+ * Return: 1 if a packet is in the duplicate list, 0 otherwise.
*/
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
struct sk_buff *skb)
@@ -1394,14 +1501,13 @@ out:
}
/**
- * batadv_bla_is_backbone_gw_orig
+ * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for
+ * the VLAN identified by vid.
* @bat_priv: the bat priv with all the soft interface information
* @orig: originator mac address
* @vid: VLAN identifier
*
- * Check if the originator is a gateway for the VLAN identified by vid.
- *
- * Returns true if orig is a backbone for this vid, false otherwise.
+ * Return: true if orig is a backbone for this vid, false otherwise.
*/
bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid)
@@ -1435,14 +1541,13 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
}
/**
- * batadv_bla_is_backbone_gw
+ * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN.
* @skb: the frame to be checked
* @orig_node: the orig_node of the frame
* @hdr_size: maximum length of the frame
*
- * bla_is_backbone_gw inspects the skb for the VLAN ID and returns 1
- * if the orig_node is also a gateway on the soft interface, otherwise it
- * returns 0.
+ * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise
+ * it returns 0.
*/
int batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct batadv_orig_node *orig_node, int hdr_size)
@@ -1465,11 +1570,16 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
if (!backbone_gw)
return 0;
- batadv_backbone_gw_free_ref(backbone_gw);
+ batadv_backbone_gw_put(backbone_gw);
return 1;
}
-/* free all bla structures (for softinterface free or module unload) */
+/**
+ * batadv_bla_init - free all bla structures
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * for softinterface free or module unload
+ */
void batadv_bla_free(struct batadv_priv *bat_priv)
{
struct batadv_hard_iface *primary_if;
@@ -1488,22 +1598,23 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
bat_priv->bla.backbone_hash = NULL;
}
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
- * batadv_bla_rx
+ * batadv_bla_rx - check packets coming from the mesh.
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
* @is_bcast: the packet came in a broadcast packet type.
*
- * bla_rx avoidance checks if:
+ * batadv_bla_rx avoidance checks if:
* * we have to race for a claim
* * if the frame is allowed on the LAN
*
- * in these cases, the skb is further handled by this function and
- * returns 1, otherwise it returns 0 and the caller shall further
+ * in these cases, the skb is further handled by this function
+ *
+ * Return: 1 if handled, otherwise it returns 0 and the caller shall further
* process the skb.
*/
int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
@@ -1580,27 +1691,28 @@ handled:
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (claim)
- batadv_claim_free_ref(claim);
+ batadv_claim_put(claim);
return ret;
}
/**
- * batadv_bla_tx
+ * batadv_bla_tx - check packets going into the mesh
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
*
- * bla_tx checks if:
+ * batadv_bla_tx checks if:
* * a claim was received which has to be processed
* * the frame is allowed on the mesh
*
- * in these cases, the skb is further handled by this function and
- * returns 1, otherwise it returns 0 and the caller shall further
- * process the skb.
+ * in these cases, the skb is further handled by this function.
*
* This call might reallocate skb data.
+ *
+ * Return: 1 if handled, otherwise it returns 0 and the caller shall further
+ * process the skb.
*/
int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid)
@@ -1668,12 +1780,19 @@ handled:
ret = 1;
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (claim)
- batadv_claim_free_ref(claim);
+ batadv_claim_put(claim);
return ret;
}
+/**
+ * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1719,10 +1838,18 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
+/**
+ * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
+ * file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1776,6 +1903,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 7ea199b8b5ab..579f0fa6fe6a 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 037ad0a5f485..48253cf8341b 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -281,6 +281,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
* originator table of an hard interface
* @inode: inode pointer to debugfs file
* @file: pointer to the seq_file
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
static int batadv_originators_hardif_open(struct inode *inode,
struct file *file)
@@ -329,6 +331,8 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
* batadv_dat_cache_open - Prepare file handler for reads from dat_chache
* @inode: inode which was opened
* @file: file handle to be initialized
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
static int batadv_dat_cache_open(struct inode *inode, struct file *file)
{
@@ -483,6 +487,8 @@ void batadv_debugfs_destroy(void)
* batadv_debugfs_add_hardif - creates the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which should be added.
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
{
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 80ab8d6f0ab3..1ab4e2e63afc 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..4c9b69d465a6 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -30,6 +30,7 @@
#include <linux/in.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -62,21 +63,34 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly
- * free it
- * @dat_entry: the entry to free
+ * batadv_dat_entry_release - release dat_entry from lists and queue for free
+ * after rcu grace period
+ * @ref: kref pointer of the dat_entry
*/
-static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
+static void batadv_dat_entry_release(struct kref *ref)
{
- if (atomic_dec_and_test(&dat_entry->refcount))
- kfree_rcu(dat_entry, rcu);
+ struct batadv_dat_entry *dat_entry;
+
+ dat_entry = container_of(ref, struct batadv_dat_entry, refcount);
+
+ kfree_rcu(dat_entry, rcu);
+}
+
+/**
+ * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly
+ * release it
+ * @dat_entry: dat_entry to be free'd
+ */
+static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
+{
+ kref_put(&dat_entry->refcount, batadv_dat_entry_release);
}
/**
* batadv_dat_to_purge - check whether a dat_entry has to be purged or not
* @dat_entry: the entry to check
*
- * Returns true if the entry has to be purged now, false otherwise.
+ * Return: true if the entry has to be purged now, false otherwise.
*/
static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
{
@@ -121,7 +135,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
continue;
hlist_del_rcu(&dat_entry->hash_entry);
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
}
spin_unlock_bh(list_lock);
}
@@ -151,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work)
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Returns 1 if the two entries are the same, 0 otherwise.
+ * Return: 1 if the two entries are the same, 0 otherwise.
*/
static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
{
@@ -166,7 +180,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the hw_src field in the ARP packet.
+ * Return: the value of the hw_src field in the ARP packet.
*/
static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
{
@@ -183,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the ip_src field in the ARP packet.
+ * Return: the value of the ip_src field in the ARP packet.
*/
static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
{
@@ -195,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the hw_dst field in the ARP packet.
+ * Return: the value of the hw_dst field in the ARP packet.
*/
static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
{
@@ -207,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the ip_dst field in the ARP packet.
+ * Return: the value of the ip_dst field in the ARP packet.
*/
static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
{
@@ -219,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
* @data: data to hash
* @size: size of the hash table
*
- * Returns the selected index in the hash table for the given data.
+ * Return: the selected index in the hash table for the given data.
*/
static u32 batadv_hash_dat(const void *data, u32 size)
{
@@ -256,7 +270,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
* @ip: search key
* @vid: VLAN identifier
*
- * Returns the dat_entry if found, NULL otherwise.
+ * Return: the dat_entry if found, NULL otherwise.
*/
static struct batadv_dat_entry *
batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
@@ -281,7 +295,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
if (dat_entry->ip != ip)
continue;
- if (!atomic_inc_not_zero(&dat_entry->refcount))
+ if (!kref_get_unless_zero(&dat_entry->refcount))
continue;
dat_entry_tmp = dat_entry;
@@ -326,7 +340,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
dat_entry->vid = vid;
ether_addr_copy(dat_entry->mac_addr, mac_addr);
dat_entry->last_update = jiffies;
- atomic_set(&dat_entry->refcount, 2);
+ kref_init(&dat_entry->refcount);
+ kref_get(&dat_entry->refcount);
hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
batadv_hash_dat, dat_entry,
@@ -334,7 +349,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
goto out;
}
@@ -343,7 +358,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
out:
if (dat_entry)
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
}
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -440,7 +455,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @candidate: orig_node under evaluation
* @max_orig_node: last selected candidate
*
- * Returns true if the node has been elected as next candidate or false
+ * Return: true if the node has been elected as next candidate or false
* otherwise.
*/
static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
@@ -527,12 +542,12 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
max_orig_node))
continue;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
continue;
max = tmp_max;
if (max_orig_node)
- batadv_orig_node_free_ref(max_orig_node);
+ batadv_orig_node_put(max_orig_node);
max_orig_node = orig_node;
}
rcu_read_unlock();
@@ -558,7 +573,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* closest values (from the LEFT, with wrap around if needed) then the hash
* value of the key. ip_dst is the key.
*
- * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
+ * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
*/
static struct batadv_dat_candidate *
batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
@@ -602,7 +617,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
* This function copies the skb with pskb_copy() and is sent as unicast packet
* to each of the selected candidates.
*
- * Returns true if the packet is sent to at least one candidate, false
+ * Return: true if the packet is sent to at least one candidate, false
* otherwise.
*/
static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
@@ -659,9 +674,9 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
ret = true;
}
free_neigh:
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
free_orig:
- batadv_orig_node_free_ref(cand[i].orig_node);
+ batadv_orig_node_put(cand[i].orig_node);
}
out:
@@ -741,6 +756,8 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
/**
* batadv_dat_init - initialise the DAT internals
* @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 in case of success, a negative error code otherwise
*/
int batadv_dat_init(struct batadv_priv *bat_priv)
{
@@ -779,6 +796,8 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
* batadv_dat_cache_seq_print_text - print the local DAT hash table
* @seq: seq file to print on
* @offset: not used
+ *
+ * Return: always 0
*/
int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
{
@@ -821,7 +840,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
@@ -831,7 +850,7 @@ out:
* @skb: packet to analyse
* @hdr_size: size of the possible header before the ARP packet in the skb
*
- * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise.
+ * Return: the ARP type if the skb contains a valid ARP packet, 0 otherwise.
*/
static u16 batadv_arp_get_type(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -904,8 +923,9 @@ out:
* @skb: the buffer containing the packet to extract the VID from
* @hdr_size: the size of the batman-adv header encapsulating the packet
*
- * If the packet embedded in the skb is vlan tagged this function returns the
- * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned.
+ * Return: If the packet embedded in the skb is vlan tagged this function
+ * returns the VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS
+ * is returned.
*/
static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
{
@@ -930,7 +950,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
*
- * Returns true if the message has been sent to the dht candidates, false
+ * Return: true if the message has been sent to the dht candidates, false
* otherwise. In case of a positive return value the message has to be enqueued
* to permit the fallback.
*/
@@ -1009,7 +1029,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
}
out:
if (dat_entry)
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
@@ -1020,7 +1040,7 @@ out:
* @skb: packet to check
* @hdr_size: size of the encapsulation header
*
- * Returns true if the request has been answered, false otherwise.
+ * Return: true if the request has been answered, false otherwise.
*/
bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -1089,7 +1109,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
}
out:
if (dat_entry)
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
if (ret)
kfree_skb(skb);
return ret;
@@ -1143,7 +1163,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
* @skb: packet to check
* @hdr_size: size of the encapsulation header
*
- * Returns true if the packet was snooped and consumed by DAT. False if the
+ * Return: true if the packet was snooped and consumed by DAT. False if the
* packet has to be delivered to the interface
*/
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
@@ -1200,7 +1220,7 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @forw_packet: the broadcast packet
*
- * Returns true if the node can drop the packet, false otherwise.
+ * Return: true if the node can drop the packet, false otherwise.
*/
bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet)
@@ -1242,6 +1262,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
out:
if (dat_entry)
- batadv_dat_entry_free_ref(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 26d4a525a798..813ecea96cf9 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 20d9282f895b..adb9c3989add 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
@@ -85,7 +85,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
/**
* batadv_frag_size_limit - maximum possible size of packet to be fragmented
*
- * Returns the maximum size of payload that can be fragmented.
+ * Return: the maximum size of payload that can be fragmented.
*/
static int batadv_frag_size_limit(void)
{
@@ -107,7 +107,7 @@ static int batadv_frag_size_limit(void)
*
* Caller must hold chain->lock.
*
- * Returns true if chain is empty and caller can just insert the new fragment
+ * Return: true if chain is empty and caller can just insert the new fragment
* without searching for the right position.
*/
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
@@ -136,7 +136,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
* Insert a new fragment into the reverse ordered chain in the right table
* entry. The hash table entry is cleared if "old" fragments exist in it.
*
- * Returns true if skb is buffered, false on error. If the chain has all the
+ * Return: true if skb is buffered, false on error. If the chain has all the
* fragments needed to merge the packet, the chain is moved to the passed head
* to avoid locking the chain in the table.
*/
@@ -242,12 +242,11 @@ err:
/**
* batadv_frag_merge_packets - merge a chain of fragments
* @chain: head of chain with fragments
- * @skb: packet with total size of skb after merging
*
* Expand the first skb in the chain and copy the content of the remaining
* skb's into the expanded one. After doing so, clear the chain.
*
- * Returns the merged skb or NULL on error.
+ * Return: the merged skb or NULL on error.
*/
static struct sk_buff *
batadv_frag_merge_packets(struct hlist_head *chain)
@@ -307,6 +306,9 @@ free:
* There are three possible outcomes: 1) Packet is merged: Return true and
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and leave skb as is.
+ *
+ * Return: true when packet is merged or buffered, false when skb is not not
+ * used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
struct batadv_orig_node *orig_node_src)
@@ -344,7 +346,7 @@ out_err:
* will exceed the MTU towards the next-hop. If so, the fragment is forwarded
* without merging it.
*
- * Returns true if the fragment is consumed/forwarded, false otherwise.
+ * Return: true if the fragment is consumed/forwarded, false otherwise.
*/
bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_hard_iface *recv_if,
@@ -383,9 +385,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
out:
if (orig_node_dst)
- batadv_orig_node_free_ref(orig_node_dst);
+ batadv_orig_node_put(orig_node_dst);
if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
@@ -399,7 +401,7 @@ out:
* passed mtu and the old one with the rest. The new skb contains data from the
* tail of the old skb.
*
- * Returns the new fragment, NULL on error.
+ * Return: the new fragment, NULL on error.
*/
static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
struct batadv_frag_packet *frag_head,
@@ -433,7 +435,7 @@ err:
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
*
- * Returns true on success, false otherwise.
+ * Return: true on success, false otherwise.
*/
bool batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -510,7 +512,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
out_err:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return ret;
}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 8b9877e70b95..9ff77c7ef7c7 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
@@ -42,7 +42,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
* batadv_frag_check_entry - check if a list of fragments has timed out
* @frags_entry: table entry to check
*
- * Returns true if the frags entry has timed out, false otherwise.
+ * Return: true if the frags entry has timed out, false otherwise.
*/
static inline bool
batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e6c8382c79ba..c59aff5ccac8 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -28,6 +28,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
@@ -59,12 +60,28 @@
*/
#define BATADV_DHCP_CHADDR_OFFSET 28
-static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
+/**
+ * batadv_gw_node_release - release gw_node from lists and queue for free after
+ * rcu grace period
+ * @ref: kref pointer of the gw_node
+ */
+static void batadv_gw_node_release(struct kref *ref)
{
- if (atomic_dec_and_test(&gw_node->refcount)) {
- batadv_orig_node_free_ref(gw_node->orig_node);
- kfree_rcu(gw_node, rcu);
- }
+ struct batadv_gw_node *gw_node;
+
+ gw_node = container_of(ref, struct batadv_gw_node, refcount);
+
+ batadv_orig_node_put(gw_node->orig_node);
+ kfree_rcu(gw_node, rcu);
+}
+
+/**
+ * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
+ * @gw_node: gateway node to free
+ */
+static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+{
+ kref_put(&gw_node->refcount, batadv_gw_node_release);
}
static struct batadv_gw_node *
@@ -77,7 +94,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
if (!gw_node)
goto out;
- if (!atomic_inc_not_zero(&gw_node->refcount))
+ if (!kref_get_unless_zero(&gw_node->refcount))
gw_node = NULL;
out:
@@ -100,14 +117,14 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
if (!orig_node)
goto unlock;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
orig_node = NULL;
unlock:
rcu_read_unlock();
out:
if (gw_node)
- batadv_gw_node_free_ref(gw_node);
+ batadv_gw_node_put(gw_node);
return orig_node;
}
@@ -118,14 +135,14 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->gw.list_lock);
- if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
+ if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
new_gw_node = NULL;
curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
if (curr_gw_node)
- batadv_gw_node_free_ref(curr_gw_node);
+ batadv_gw_node_put(curr_gw_node);
spin_unlock_bh(&bat_priv->gw.list_lock);
}
@@ -170,7 +187,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (!router_ifinfo)
goto next;
- if (!atomic_inc_not_zero(&gw_node->refcount))
+ if (!kref_get_unless_zero(&gw_node->refcount))
goto next;
tq_avg = router_ifinfo->bat_iv.tq_avg;
@@ -186,9 +203,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
((tmp_gw_factor == max_gw_factor) &&
(tq_avg > max_tq))) {
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
- atomic_inc(&curr_gw->refcount);
+ kref_get(&curr_gw->refcount);
}
break;
@@ -201,9 +218,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
*/
if (tq_avg > max_tq) {
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
- atomic_inc(&curr_gw->refcount);
+ kref_get(&curr_gw->refcount);
}
break;
}
@@ -214,12 +231,12 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (tmp_gw_factor > max_gw_factor)
max_gw_factor = tmp_gw_factor;
- batadv_gw_node_free_ref(gw_node);
+ batadv_gw_node_put(gw_node);
next:
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
if (router_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
rcu_read_unlock();
@@ -255,7 +272,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
*/
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
}
void batadv_gw_election(struct batadv_priv *bat_priv)
@@ -330,13 +347,13 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
out:
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
if (next_gw)
- batadv_gw_node_free_ref(next_gw);
+ batadv_gw_node_put(next_gw);
if (router)
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
if (router_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
void batadv_gw_check_election(struct batadv_priv *bat_priv,
@@ -397,15 +414,15 @@ reselect:
batadv_gw_reselect(bat_priv);
out:
if (curr_gw_orig)
- batadv_orig_node_free_ref(curr_gw_orig);
+ batadv_orig_node_put(curr_gw_orig);
if (router_gw)
- batadv_neigh_node_free_ref(router_gw);
+ batadv_neigh_node_put(router_gw);
if (router_orig)
- batadv_neigh_node_free_ref(router_orig);
+ batadv_neigh_node_put(router_orig);
if (router_gw_tq)
- batadv_neigh_ifinfo_free_ref(router_gw_tq);
+ batadv_neigh_ifinfo_put(router_gw_tq);
if (router_orig_tq)
- batadv_neigh_ifinfo_free_ref(router_orig_tq);
+ batadv_neigh_ifinfo_put(router_orig_tq);
}
/**
@@ -423,12 +440,12 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
if (gateway->bandwidth_down == 0)
return;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
return;
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
if (!gw_node) {
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return;
}
@@ -436,7 +453,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
gw_node->orig_node = orig_node;
gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- atomic_set(&gw_node->refcount, 1);
+ kref_init(&gw_node->refcount);
spin_lock_bh(&bat_priv->gw.list_lock);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
@@ -456,7 +473,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
*
- * Returns gateway node if found or NULL otherwise.
+ * Return: gateway node if found or NULL otherwise.
*/
static struct batadv_gw_node *
batadv_gw_node_get(struct batadv_priv *bat_priv,
@@ -469,7 +486,7 @@ batadv_gw_node_get(struct batadv_priv *bat_priv,
if (gw_node_tmp->orig_node != orig_node)
continue;
- if (!atomic_inc_not_zero(&gw_node_tmp->refcount))
+ if (!kref_get_unless_zero(&gw_node_tmp->refcount))
continue;
gw_node = gw_node_tmp;
@@ -527,22 +544,23 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
* gets dereferenced.
*/
spin_lock_bh(&bat_priv->gw.list_lock);
- hlist_del_init_rcu(&gw_node->list);
+ if (!hlist_unhashed(&gw_node->list)) {
+ hlist_del_init_rcu(&gw_node->list);
+ batadv_gw_node_put(gw_node);
+ }
spin_unlock_bh(&bat_priv->gw.list_lock);
- batadv_gw_node_free_ref(gw_node);
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
}
out:
if (gw_node)
- batadv_gw_node_free_ref(gw_node);
+ batadv_gw_node_put(gw_node);
}
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
@@ -565,7 +583,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
hlist_for_each_entry_safe(gw_node, node_tmp,
&bat_priv->gw.list, list) {
hlist_del_init_rcu(&gw_node->list);
- batadv_gw_node_free_ref(gw_node);
+ batadv_gw_node_put(gw_node);
}
spin_unlock_bh(&bat_priv->gw.list_lock);
}
@@ -602,12 +620,12 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
ret = seq_has_overflowed(seq) ? -1 : 0;
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
out:
if (router_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
return ret;
}
@@ -644,7 +662,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
@@ -655,13 +673,13 @@ out:
* @chaddr: buffer where the client address will be stored. Valid
* only if the function returns BATADV_DHCP_TO_CLIENT
*
- * Returns:
+ * This function may re-allocate the data buffer of the skb passed as argument.
+ *
+ * Return:
* - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error
* while parsing it
* - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server
* - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client
- *
- * This function may re-allocate the data buffer of the skb passed as argument.
*/
enum batadv_dhcp_recipient
batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
@@ -776,11 +794,11 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
* server. Due to topology changes it may be the case that the GW server
* previously selected is not the best one anymore.
*
- * Returns true if the packet destination is unicast and it is not the best gw,
- * false otherwise.
- *
* This call might reallocate skb data.
* Must be invoked only when the DHCP packet is going TO a DHCP SERVER.
+ *
+ * Return: true if the packet destination is unicast and it is not the best gw,
+ * false otherwise.
*/
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
struct sk_buff *skb)
@@ -838,7 +856,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
goto out;
curr_tq_avg = curr_ifinfo->bat_iv.tq_avg;
- batadv_neigh_ifinfo_free_ref(curr_ifinfo);
+ batadv_neigh_ifinfo_put(curr_ifinfo);
break;
case BATADV_GW_MODE_OFF:
@@ -856,18 +874,18 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD)
out_of_range = true;
- batadv_neigh_ifinfo_free_ref(old_ifinfo);
+ batadv_neigh_ifinfo_put(old_ifinfo);
out:
if (orig_dst_node)
- batadv_orig_node_free_ref(orig_dst_node);
+ batadv_orig_node_put(orig_dst_node);
if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
+ batadv_gw_node_put(curr_gw);
if (gw_node)
- batadv_gw_node_free_ref(gw_node);
+ batadv_gw_node_put(gw_node);
if (neigh_old)
- batadv_neigh_node_free_ref(neigh_old);
+ batadv_neigh_node_put(neigh_old);
if (neigh_curr)
- batadv_neigh_node_free_ref(neigh_curr);
+ batadv_neigh_node_put(neigh_curr);
return out_of_range;
}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index fa9527785ed3..582dd8c413c8 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b51bface8bdd..5ee04f7140af 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -38,7 +38,7 @@
* @description: text shown when throughput string cannot be parsed
* @throughput: pointer holding the returned throughput information
*
- * Returns false on parse error and true otherwise.
+ * Return: false on parse error and true otherwise.
*/
static bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
const char *description, u32 *throughput)
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index ab893e318229..b58346350024 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 01acccc4d218..b22b2775a0a5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,6 +18,7 @@
#include "hard-interface.h"
#include "main.h"
+#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
@@ -26,6 +27,7 @@
#include <linux/if_ether.h>
#include <linux/if.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@ -47,13 +49,19 @@
#include "sysfs.h"
#include "translation-table.h"
-void batadv_hardif_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_hardif_release - release hard interface from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the hard interface
+ */
+void batadv_hardif_release(struct kref *ref)
{
struct batadv_hard_iface *hard_iface;
- hard_iface = container_of(rcu, struct batadv_hard_iface, rcu);
+ hard_iface = container_of(ref, struct batadv_hard_iface, refcount);
dev_put(hard_iface->net_dev);
- kfree(hard_iface);
+
+ kfree_rcu(hard_iface, rcu);
}
struct batadv_hard_iface *
@@ -64,7 +72,7 @@ batadv_hardif_get_by_netdev(const struct net_device *net_dev)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->net_dev == net_dev &&
- atomic_inc_not_zero(&hard_iface->refcount))
+ kref_get_unless_zero(&hard_iface->refcount))
goto out;
}
@@ -76,6 +84,28 @@ out:
}
/**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+ const struct net_device *dev2)
+{
+ int dev1_parent_iflink = dev_get_iflink(dev1);
+ int dev2_parent_iflink = dev_get_iflink(dev2);
+
+ if (!dev1_parent_iflink || !dev2_parent_iflink)
+ return false;
+
+ return (dev1_parent_iflink == dev2->ifindex) &&
+ (dev2_parent_iflink == dev1->ifindex);
+}
+
+/**
* batadv_is_on_batman_iface - check if a device is a batman iface descendant
* @net_dev: the device to check
*
@@ -85,7 +115,7 @@ out:
* This function recursively checks all the fathers of the device passed as
* argument looking for a batman-adv soft interface.
*
- * Returns true if the device is descendant of a batman-adv mesh interface (or
+ * Return: true if the device is descendant of a batman-adv mesh interface (or
* if it is a batman-adv interface itself), false otherwise
*/
static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
@@ -108,6 +138,9 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
if (WARN(!parent_dev, "Cannot find parent device"))
return false;
+ if (batadv_mutual_parents(net_dev, parent_dev))
+ return false;
+
ret = batadv_is_on_batman_iface(parent_dev);
return ret;
@@ -136,7 +169,7 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
* interface
* @net_device: the device to check
*
- * Returns true if the net device is a 802.11 wireless device, false otherwise.
+ * Return: true if the net device is a 802.11 wireless device, false otherwise.
*/
bool batadv_is_wifi_netdev(struct net_device *net_device)
{
@@ -169,7 +202,7 @@ batadv_hardif_get_active(const struct net_device *soft_iface)
continue;
if (hard_iface->if_status == BATADV_IF_ACTIVE &&
- atomic_inc_not_zero(&hard_iface->refcount))
+ kref_get_unless_zero(&hard_iface->refcount))
goto out;
}
@@ -193,7 +226,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -203,7 +236,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
ASSERT_RTNL();
- if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount))
+ if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
new_hard_iface = NULL;
curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
@@ -217,7 +250,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
out:
if (curr_hard_iface)
- batadv_hardif_free_ref(curr_hard_iface);
+ batadv_hardif_put(curr_hard_iface);
}
static bool
@@ -376,7 +409,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
static void
@@ -401,7 +434,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
*
* Invoke ndo_del_slave on master passing slave as argument. In this way slave
* is free'd and master can correctly change its internal state.
- * Return 0 on success, a negative value representing the error otherwise
+ *
+ * Return: 0 on success, a negative value representing the error otherwise
*/
static int batadv_master_del_slave(struct batadv_hard_iface *slave,
struct net_device *master)
@@ -430,7 +464,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount))
+ if (!kref_get_unless_zero(&hard_iface->refcount))
goto out;
soft_iface = dev_get_by_name(&init_net, iface_name);
@@ -528,7 +562,7 @@ err_dev:
hard_iface->soft_iface = NULL;
dev_put(soft_iface);
err:
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -559,7 +593,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_primary_if_select(bat_priv, new_if);
if (new_if)
- batadv_hardif_free_ref(new_if);
+ batadv_hardif_put(new_if);
}
bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
@@ -582,11 +616,11 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
}
hard_iface->soft_iface = NULL;
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -605,7 +639,7 @@ static void batadv_hardif_remove_interface_finish(struct work_struct *work)
batadv_debugfs_del_hardif(hard_iface);
batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
}
static struct batadv_hard_iface *
@@ -651,7 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
/* extra reference for return */
- atomic_set(&hard_iface->refcount, 2);
+ kref_init(&hard_iface->refcount);
+ kref_get(&hard_iface->refcount);
batadv_check_known_mac_addr(hard_iface->net_dev);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
@@ -759,10 +794,10 @@ static int batadv_hard_if_event(struct notifier_block *this,
}
hardif_put:
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return NOTIFY_DONE;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 5a31420513e1..d74f1983f33e 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -20,8 +20,8 @@
#include "main.h"
-#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/kref.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/stddef.h>
@@ -61,30 +61,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
-void batadv_hardif_free_rcu(struct rcu_head *rcu);
+void batadv_hardif_release(struct kref *ref);
/**
- * batadv_hardif_free_ref - decrement the hard interface refcounter and
- * possibly free it
+ * batadv_hardif_put - decrement the hard interface refcounter and possibly
+ * release it
* @hard_iface: the hard interface to free
*/
-static inline void
-batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
+static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
{
- if (atomic_dec_and_test(&hard_iface->refcount))
- call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
-}
-
-/**
- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and
- * possibly free it (without rcu callback)
- * @hard_iface: the hard interface to free
- */
-static inline void
-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface)
-{
- if (atomic_dec_and_test(&hard_iface->refcount))
- batadv_hardif_free_rcu(&hard_iface->rcu);
+ kref_put(&hard_iface->refcount, batadv_hardif_release);
}
static inline struct batadv_hard_iface *
@@ -97,7 +83,7 @@ batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
if (!hard_iface)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount))
+ if (!kref_get_unless_zero(&hard_iface->refcount))
hard_iface = NULL;
out:
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 2ea6a18d793f..a0a0fdb85805 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 377626250ac7..9bb57b87447c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -30,14 +30,17 @@
struct lock_class_key;
/* callback to a compare function. should compare 2 element datas for their
- * keys, return 0 if same and not 0 if not same
+ * keys
+ *
+ * Return: 0 if same and not 0 if not same
*/
typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *,
const void *);
-/* the hashfunction, should return an index
- * based on the key in the data of the first
- * argument and the size the second
+/* the hashfunction
+ *
+ * Return: an index based on the key in the data of the first argument and the
+ * size the second
*/
typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
@@ -96,7 +99,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
* @data: data passed to the aforementioned callbacks as argument
* @data_node: to be added element
*
- * Returns 0 on success, 1 if the element already is in the hash
+ * Return: 0 on success, 1 if the element already is in the hash
* and -1 on error.
*/
static inline int batadv_hash_add(struct batadv_hashtable *hash,
@@ -139,10 +142,11 @@ out:
return ret;
}
-/* removes data from hash, if found. returns pointer do data on success, so you
- * can remove the used structure yourself, or NULL on error . data could be the
- * structure you use with just the key filled, we just need the key for
- * comparing.
+/* removes data from hash, if found. data could be the structure you use with
+ * just the key filled, we just need the key for comparing.
+ *
+ * Return: returns pointer do data on success, so you can remove the used
+ * structure yourself, or NULL on error
*/
static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
batadv_hashdata_compare_cb compare,
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bcabb5e3f4d3..6268f08b7154 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -288,11 +288,11 @@ free_skb:
kfree_skb(skb);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return len;
}
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index e937143f0b10..618d5de06f20 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4b5d61fbadb1..e3d7051747b0 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -29,6 +29,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/module.h>
@@ -233,7 +234,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address to check
*
- * Returns 'true' if the mac address was found, false otherwise.
+ * Return: 'true' if the mac address was found, false otherwise.
*/
bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
{
@@ -262,7 +263,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
* function that requires the primary interface
* @seq: debugfs table seq_file struct
*
- * Returns primary interface if found or NULL otherwise.
+ * Return: primary interface if found or NULL otherwise.
*/
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq)
@@ -286,7 +287,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
seq_printf(seq,
"BATMAN mesh %s disabled - primary interface not active\n",
net_dev->name);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
primary_if = NULL;
out:
@@ -297,7 +298,7 @@ out:
* batadv_max_header_len - calculate maximum encapsulation overhead for a
* payload packet
*
- * Return the maximum encapsulation overhead in bytes.
+ * Return: the maximum encapsulation overhead in bytes.
*/
int batadv_max_header_len(void)
{
@@ -599,6 +600,8 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
*
* payload_ptr must always point to an address in the skb head buffer and not to
* a fragment.
+ *
+ * Return: big endian crc32c of the checksummed data
*/
__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
{
@@ -622,15 +625,26 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
}
/**
- * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and
- * possibly free it
+ * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_handler_release(struct kref *ref)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
+ kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ * possibly release it
* @tvlv_handler: the tvlv handler to free
*/
-static void
-batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
+static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
{
- if (atomic_dec_and_test(&tvlv_handler->refcount))
- kfree_rcu(tvlv_handler, rcu);
+ kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
}
/**
@@ -640,7 +654,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
* @type: tvlv handler type to look for
* @version: tvlv handler version to look for
*
- * Returns tvlv handler if found or NULL otherwise.
+ * Return: tvlv handler if found or NULL otherwise.
*/
static struct batadv_tvlv_handler
*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
@@ -656,7 +670,7 @@ static struct batadv_tvlv_handler
if (tvlv_handler_tmp->version != version)
continue;
- if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount))
+ if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
continue;
tvlv_handler = tvlv_handler_tmp;
@@ -668,14 +682,25 @@ static struct batadv_tvlv_handler
}
/**
- * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and
- * possibly free it
+ * batadv_tvlv_container_release - release tvlv from lists and free
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_container_release(struct kref *ref)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
+ kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ * possibly release it
* @tvlv: the tvlv container to free
*/
-static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
+static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
{
- if (atomic_dec_and_test(&tvlv->refcount))
- kfree(tvlv);
+ kref_put(&tvlv->refcount, batadv_tvlv_container_release);
}
/**
@@ -688,13 +713,15 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
* Has to be called with the appropriate locks being acquired
* (tvlv.container_list_lock).
*
- * Returns tvlv container if found or NULL otherwise.
+ * Return: tvlv container if found or NULL otherwise.
*/
static struct batadv_tvlv_container
*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
{
struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
if (tvlv_tmp->tvlv_hdr.type != type)
continue;
@@ -702,7 +729,7 @@ static struct batadv_tvlv_container
if (tvlv_tmp->tvlv_hdr.version != version)
continue;
- if (!atomic_inc_not_zero(&tvlv_tmp->refcount))
+ if (!kref_get_unless_zero(&tvlv_tmp->refcount))
continue;
tvlv = tvlv_tmp;
@@ -720,13 +747,15 @@ static struct batadv_tvlv_container
* Has to be called with the appropriate locks being acquired
* (tvlv.container_list_lock).
*
- * Returns size of all currently registered tvlv containers in bytes.
+ * Return: size of all currently registered tvlv containers in bytes.
*/
static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_container *tvlv;
u16 tvlv_len = 0;
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
tvlv_len += sizeof(struct batadv_tvlv_hdr);
tvlv_len += ntohs(tvlv->tvlv_hdr.len);
@@ -755,8 +784,8 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
hlist_del(&tvlv->list);
/* first call to decrement the counter, second call to free */
- batadv_tvlv_container_free_ref(tvlv);
- batadv_tvlv_container_free_ref(tvlv);
+ batadv_tvlv_container_put(tvlv);
+ batadv_tvlv_container_put(tvlv);
}
/**
@@ -808,7 +837,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
INIT_HLIST_NODE(&tvlv_new->list);
- atomic_set(&tvlv_new->refcount, 1);
+ kref_init(&tvlv_new->refcount);
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
@@ -826,7 +855,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
* @additional_packet_len: requested additional packet size on top of minimum
* size
*
- * Returns true of the packet buffer could be changed to the requested size,
+ * Return: true of the packet buffer could be changed to the requested size,
* false otherwise.
*/
static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
@@ -862,7 +891,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
* The ogm packet might be enlarged or shrunk depending on the current size
* and the size of the to-be-appended tvlv containers.
*
- * Returns size of all appended tvlv containers in bytes.
+ * Return: size of all appended tvlv containers in bytes.
*/
u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
unsigned char **packet_buff,
@@ -915,7 +944,7 @@ end:
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
- * Returns success if handler was not found or the return value of the handler
+ * Return: success if handler was not found or the return value of the handler
* callback.
*/
static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
@@ -968,7 +997,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
- * Returns success when processing an OGM or the return value of all called
+ * Return: success when processing an OGM or the return value of all called
* handler callbacks.
*/
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
@@ -1001,7 +1030,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
src, dst, tvlv_value,
tvlv_value_cont_len);
if (tvlv_handler)
- batadv_tvlv_handler_free_ref(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
@@ -1081,7 +1110,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
if (tvlv_handler) {
- batadv_tvlv_handler_free_ref(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
return;
}
@@ -1094,7 +1123,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
tvlv_handler->type = type;
tvlv_handler->version = version;
tvlv_handler->flags = flags;
- atomic_set(&tvlv_handler->refcount, 1);
+ kref_init(&tvlv_handler->refcount);
INIT_HLIST_NODE(&tvlv_handler->list);
spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
@@ -1118,11 +1147,11 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
if (!tvlv_handler)
return;
- batadv_tvlv_handler_free_ref(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
hlist_del_rcu(&tvlv_handler->list);
spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
- batadv_tvlv_handler_free_ref(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
}
/**
@@ -1182,7 +1211,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
kfree_skb(skb);
out:
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
/**
@@ -1190,8 +1219,8 @@ out:
* @skb: the buffer containing the packet
* @header_len: length of the batman header preceding the ethernet header
*
- * If the packet embedded in the skb is vlan tagged this function returns the
- * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned.
+ * Return: VID with the BATADV_VLAN_HAS_TAG flag when the packet embedded in the
+ * skb is vlan tagged. Otherwise BATADV_NO_FLAGS.
*/
unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
{
@@ -1218,7 +1247,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
* @vid: the VLAN identifier for which the AP isolation attributed as to be
* looked up
*
- * Returns true if AP isolation is on for the VLAN idenfied by vid, false
+ * Return: true if AP isolation is on for the VLAN idenfied by vid, false
* otherwise
*/
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
@@ -1232,7 +1261,7 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (vlan) {
ap_isolation_enabled = atomic_read(&vlan->ap_isolation);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
}
return ap_isolation_enabled;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 9dbd9107e7e1..8c01f54c61f3 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -35,6 +35,9 @@
/* Time To Live of broadcast messages */
#define BATADV_TTL 50
+/* maximum sequence number age of broadcast messages */
+#define BATADV_BCAST_MAX_AGE 64
+
/* purge originators after time in seconds if no valid packet comes in
* -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
*/
@@ -97,11 +100,6 @@
*/
#define BATADV_TQ_SIMILARITY_THRESHOLD 50
-/* how much worse secondary interfaces may be to be considered as bonding
- * candidates
- */
-#define BATADV_BONDING_TQ_THRESHOLD 50
-
/* should not be bigger than 512 bytes or change the size of
* forw_packet->direct_link_flags
*/
@@ -273,9 +271,14 @@ static inline void _batadv_dbg(int type __always_unused,
pr_err("%s: " fmt, _netdev->name, ## arg); \
} while (0)
-/* returns 1 if they are the same ethernet addr
+/**
+ * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
+ * @data1: Pointer to a six-byte array containing the Ethernet address
+ * @data2: Pointer other six-byte array containing the Ethernet address
*
* note: can't use ether_addr_equal() as it requires aligned memory
+ *
+ * Return: 1 if they are the same ethernet addr
*/
static inline bool batadv_compare_eth(const void *data1, const void *data2)
{
@@ -287,7 +290,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
* @timestamp: base value to compare with (in jiffies)
* @timeout: added to base value before comparing (in milliseconds)
*
- * Returns true if current time is after timestamp + timeout
+ * Return: true if current time is after timestamp + timeout
*/
static inline bool batadv_has_timed_out(unsigned long timestamp,
unsigned int timeout)
@@ -326,7 +329,13 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
-/* Sum and return the cpu-local counters for index 'idx' */
+/**
+ * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
+ * @bat_priv: the bat priv with all the soft interface information
+ * @idx: index of counter to sum up
+ *
+ * Return: sum of all cpu-local counters
+ */
static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
{
u64 *counters, sum = 0;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index eb76386f8d4b..8caa2c72efa3 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -30,6 +30,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -55,7 +56,7 @@
* Collect multicast addresses of the local multicast listeners
* on the given soft interface, dev, in the given mcast_list.
*
- * Returns -ENOMEM on memory allocation error or the number of
+ * Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
static int batadv_mcast_mla_softif_get(struct net_device *dev,
@@ -87,7 +88,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
* @mcast_addr: the multicast address to check
* @mcast_list: the list with multicast addresses to search in
*
- * Returns true if the given address is already in the given list.
+ * Return: true if the given address is already in the given list.
* Otherwise returns false.
*/
static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
@@ -195,8 +196,9 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
* batadv_mcast_has_bridge - check whether the soft-iface is bridged
* @bat_priv: the bat priv with all the soft interface information
*
- * Checks whether there is a bridge on top of our soft interface. Returns
- * true if so, false otherwise.
+ * Checks whether there is a bridge on top of our soft interface.
+ *
+ * Return: true if there is a bridge, false otherwise.
*/
static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
{
@@ -218,7 +220,7 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
* Updates the own multicast tvlv with our current multicast related settings,
* capabilities and inabilities.
*
- * Returns true if the tvlv container is registered afterwards. Otherwise
+ * Return: true if the tvlv container is registered afterwards. Otherwise
* returns false.
*/
static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
@@ -289,8 +291,8 @@ out:
* Checks whether the given IPv4 packet has the potential to be forwarded with a
* mode more optimal than classic flooding.
*
- * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of
- * memory allocation failure.
+ * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory
+ * allocation failure.
*/
static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
struct sk_buff *skb,
@@ -327,8 +329,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
* Checks whether the given IPv6 packet has the potential to be forwarded with a
* mode more optimal than classic flooding.
*
- * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out
- * of memory.
+ * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
*/
static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
struct sk_buff *skb,
@@ -366,8 +367,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
* Checks whether the given multicast ethernet frame has the potential to be
* forwarded with a mode more optimal than classic flooding.
*
- * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out
- * of memory.
+ * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
*/
static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
struct sk_buff *skb,
@@ -398,7 +398,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: ethernet header of a packet
*
- * Returns the number of nodes which want all IPv4 multicast traffic if the
+ * Return: the number of nodes which want all IPv4 multicast traffic if the
* given ethhdr is from an IPv4 packet or the number of nodes which want all
* IPv6 traffic if it matches an IPv6 packet.
*/
@@ -421,7 +421,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: the ether header containing the multicast destination
*
- * Returns an orig_node matching the multicast address provided by ethhdr
+ * Return: an orig_node matching the multicast address provided by ethhdr
* via a translation table lookup. This increases the returned nodes refcount.
*/
static struct batadv_orig_node *
@@ -436,7 +436,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
* batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag
* @bat_priv: the bat priv with all the soft interface information
*
- * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
+ * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
* increases its refcount.
*/
static struct batadv_orig_node *
@@ -448,7 +448,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_ipv4_list,
mcast_want_all_ipv4_node) {
- if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ if (!kref_get_unless_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
@@ -463,7 +463,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
* batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag
* @bat_priv: the bat priv with all the soft interface information
*
- * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
+ * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
* and increases its refcount.
*/
static struct batadv_orig_node *
@@ -475,7 +475,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_ipv6_list,
mcast_want_all_ipv6_node) {
- if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ if (!kref_get_unless_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
@@ -491,7 +491,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: an ethernet header to determine the protocol family from
*
- * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
+ * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
* BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
* increases its refcount.
*/
@@ -514,7 +514,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
* batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag
* @bat_priv: the bat priv with all the soft interface information
*
- * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
+ * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
* set and increases its refcount.
*/
static struct batadv_orig_node *
@@ -526,7 +526,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_unsnoopables_list,
mcast_want_all_unsnoopables_node) {
- if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ if (!kref_get_unless_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
@@ -543,7 +543,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
* @skb: The multicast packet to check
* @orig: an originator to be set to forward the skb to
*
- * Returns the forwarding mode as enum batadv_forw_mode and in case of
+ * Return: the forwarding mode as enum batadv_forw_mode and in case of
* BATADV_FORW_SINGLE set the orig to the single originator the skb
* should be forwarded to.
*/
@@ -802,7 +802,9 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+ spin_lock_bh(&bat_priv->tt.commit_lock);
batadv_mcast_mla_tt_retract(bat_priv, NULL);
+ spin_unlock_bh(&bat_priv->tt.commit_lock);
}
/**
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8f3cb04b9f13..80bceec55592 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -23,7 +23,7 @@
struct sk_buff;
/**
- * batadv_forw_mode - the way a packet should be forwarded as
+ * enum batadv_forw_mode - the way a packet should be forwarded as
* @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
* flooding)
* @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index c98b0ab85449..d253bb23e2ac 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
@@ -32,6 +32,7 @@
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -64,6 +65,8 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
/**
* batadv_nc_init - one-time initialization for network coding
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
int __init batadv_nc_init(void)
{
@@ -142,6 +145,8 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
/**
* batadv_nc_mesh_init - initialise coding hash table and start house keeping
* @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
{
@@ -203,39 +208,52 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
/**
- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
- * its refcount on the orig_node
- * @rcu: rcu pointer of the nc node
+ * batadv_nc_node_release - release nc_node from lists and queue for free after
+ * rcu grace period
+ * @ref: kref pointer of the nc_node
*/
-static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+static void batadv_nc_node_release(struct kref *ref)
{
struct batadv_nc_node *nc_node;
- nc_node = container_of(rcu, struct batadv_nc_node, rcu);
- batadv_orig_node_free_ref(nc_node->orig_node);
- kfree(nc_node);
+ nc_node = container_of(ref, struct batadv_nc_node, refcount);
+
+ batadv_orig_node_put(nc_node->orig_node);
+ kfree_rcu(nc_node, rcu);
+}
+
+/**
+ * batadv_nc_node_put - decrement the nc_node refcounter and possibly
+ * release it
+ * @nc_node: nc_node to be free'd
+ */
+static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
+{
+ kref_put(&nc_node->refcount, batadv_nc_node_release);
}
/**
- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
- * frees it
- * @nc_node: the nc node to free
+ * batadv_nc_path_release - release nc_path from lists and queue for free after
+ * rcu grace period
+ * @ref: kref pointer of the nc_path
*/
-static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
+static void batadv_nc_path_release(struct kref *ref)
{
- if (atomic_dec_and_test(&nc_node->refcount))
- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+ struct batadv_nc_path *nc_path;
+
+ nc_path = container_of(ref, struct batadv_nc_path, refcount);
+
+ kfree_rcu(nc_path, rcu);
}
/**
- * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly
- * frees it
- * @nc_path: the nc node to free
+ * batadv_nc_path_put - decrement the nc_path refcounter and possibly
+ * release it
+ * @nc_path: nc_path to be free'd
*/
-static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
+static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
{
- if (atomic_dec_and_test(&nc_path->refcount))
- kfree_rcu(nc_path, rcu);
+ kref_put(&nc_path->refcount, batadv_nc_path_release);
}
/**
@@ -245,7 +263,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
{
kfree_skb(nc_packet->skb);
- batadv_nc_path_free_ref(nc_packet->nc_path);
+ batadv_nc_path_put(nc_packet->nc_path);
kfree(nc_packet);
}
@@ -254,7 +272,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
* @bat_priv: the bat priv with all the soft interface information
* @nc_node: the nc node to check
*
- * Returns true if the entry has to be purged now, false otherwise
+ * Return: true if the entry has to be purged now, false otherwise
*/
static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
struct batadv_nc_node *nc_node)
@@ -270,7 +288,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path to check
*
- * Returns true if the entry has to be purged now, false otherwise
+ * Return: true if the entry has to be purged now, false otherwise
*/
static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
struct batadv_nc_path *nc_path)
@@ -290,7 +308,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path to check
*
- * Returns true if the entry has to be purged now, false otherwise
+ * Return: true if the entry has to be purged now, false otherwise
*/
static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
struct batadv_nc_path *nc_path)
@@ -338,7 +356,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
"Removing nc_node %pM -> %pM\n",
nc_node->addr, nc_node->orig_node->orig);
list_del_rcu(&nc_node->list);
- batadv_nc_node_free_ref(nc_node);
+ batadv_nc_node_put(nc_node);
}
spin_unlock_bh(lock);
}
@@ -449,7 +467,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
"Remove nc_path %pM -> %pM\n",
nc_path->prev_hop, nc_path->next_hop);
hlist_del_rcu(&nc_path->hash_entry);
- batadv_nc_path_free_ref(nc_path);
+ batadv_nc_path_put(nc_path);
}
spin_unlock_bh(lock);
}
@@ -473,7 +491,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
* @data: data to hash
* @size: size of the hash table
*
- * Returns the selected index in the hash table for the given data.
+ * Return: the selected index in the hash table for the given data.
*/
static u32 batadv_nc_hash_choose(const void *data, u32 size)
{
@@ -492,7 +510,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Returns 1 if the two entry are the same, 0 otherwise
+ * Return: 1 if the two entry are the same, 0 otherwise
*/
static int batadv_nc_hash_compare(const struct hlist_node *node,
const void *data2)
@@ -519,7 +537,7 @@ static int batadv_nc_hash_compare(const struct hlist_node *node,
* @hash: hash table containing the nc path
* @data: search key
*
- * Returns the nc_path if found, NULL otherwise.
+ * Return: the nc_path if found, NULL otherwise.
*/
static struct batadv_nc_path *
batadv_nc_hash_find(struct batadv_hashtable *hash,
@@ -540,7 +558,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
continue;
- if (!atomic_inc_not_zero(&nc_path->refcount))
+ if (!kref_get_unless_zero(&nc_path->refcount))
continue;
nc_path_tmp = nc_path;
@@ -574,7 +592,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
* timeout. If so, the packet is no longer kept and the entry deleted from the
* queue. Has to be called with the appropriate locks.
*
- * Returns false as soon as the entry in the fifo queue has not been timed out
+ * Return: false as soon as the entry in the fifo queue has not been timed out
* yet and true otherwise.
*/
static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
@@ -613,7 +631,7 @@ out:
* packet is no longer delayed, immediately sent and the entry deleted from the
* queue. Has to be called with the appropriate locks.
*
- * Returns false as soon as the entry in the fifo queue has not been timed out
+ * Return: false as soon as the entry in the fifo queue has not been timed out
* yet and true otherwise.
*/
static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
@@ -734,7 +752,7 @@ static void batadv_nc_worker(struct work_struct *work)
* @orig_node: neighboring orig node which may be used as nc candidate
* @ogm_packet: incoming ogm packet also used for the checks
*
- * Returns true if:
+ * Return: true if:
* 1) The OGM must have the most recent sequence number.
* 2) The TTL must be decremented by one and only one.
* 3) The OGM must be received from the first hop from orig_node.
@@ -754,7 +772,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
last_ttl = orig_ifinfo->last_ttl;
last_real_seqno = orig_ifinfo->last_real_seqno;
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
if (last_real_seqno != ntohl(ogm_packet->seqno))
return false;
@@ -775,7 +793,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
* (can be equal to orig_node)
* @in_coding: traverse incoming or outgoing network coding list
*
- * Returns the nc_node if found, NULL otherwise.
+ * Return: the nc_node if found, NULL otherwise.
*/
static struct batadv_nc_node
*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
@@ -796,7 +814,7 @@ static struct batadv_nc_node
if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
continue;
- if (!atomic_inc_not_zero(&nc_node->refcount))
+ if (!kref_get_unless_zero(&nc_node->refcount))
continue;
/* Found a match */
@@ -817,7 +835,7 @@ static struct batadv_nc_node
* (can be equal to orig_node)
* @in_coding: traverse incoming or outgoing network coding list
*
- * Returns the nc_node if found or created, NULL in case of an error.
+ * Return: the nc_node if found or created, NULL in case of an error.
*/
static struct batadv_nc_node
*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
@@ -840,14 +858,15 @@ static struct batadv_nc_node
if (!nc_node)
return NULL;
- if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
+ if (!kref_get_unless_zero(&orig_neigh_node->refcount))
goto free;
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
ether_addr_copy(nc_node->addr, orig_node->orig);
nc_node->orig_node = orig_neigh_node;
- atomic_set(&nc_node->refcount, 2);
+ kref_init(&nc_node->refcount);
+ kref_get(&nc_node->refcount);
/* Select ingoing or outgoing coding node */
if (in_coding) {
@@ -923,9 +942,9 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
out:
if (in_nc_node)
- batadv_nc_node_free_ref(in_nc_node);
+ batadv_nc_node_put(in_nc_node);
if (out_nc_node)
- batadv_nc_node_free_ref(out_nc_node);
+ batadv_nc_node_put(out_nc_node);
}
/**
@@ -935,7 +954,7 @@ out:
* @src: ethernet source address - first half of the nc path search key
* @dst: ethernet destination address - second half of the nc path search key
*
- * Returns pointer to nc_path if the path was found or created, returns NULL
+ * Return: pointer to nc_path if the path was found or created, returns NULL
* on error.
*/
static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
@@ -966,7 +985,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
/* Initialize nc_path */
INIT_LIST_HEAD(&nc_path->packet_list);
spin_lock_init(&nc_path->packet_list_lock);
- atomic_set(&nc_path->refcount, 2);
+ kref_init(&nc_path->refcount);
+ kref_get(&nc_path->refcount);
nc_path->last_valid = jiffies;
ether_addr_copy(nc_path->next_hop, dst);
ether_addr_copy(nc_path->prev_hop, src);
@@ -992,6 +1012,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
* batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
* selection of a receiver with slightly lower TQ than the other
* @tq: to be weighted tq value
+ *
+ * Return: scaled tq value
*/
static u8 batadv_nc_random_weight_tq(u8 tq)
{
@@ -1032,7 +1054,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
* @nc_packet: structure containing the packet to the skb can be coded with
* @neigh_node: next hop to forward packet to
*
- * Returns true if both packets are consumed, false otherwise.
+ * Return: true if both packets are consumed, false otherwise.
*/
static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
struct sk_buff *skb,
@@ -1206,13 +1228,13 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
res = true;
out:
if (router_neigh)
- batadv_neigh_node_free_ref(router_neigh);
+ batadv_neigh_node_put(router_neigh);
if (router_coding)
- batadv_neigh_node_free_ref(router_coding);
+ batadv_neigh_node_put(router_coding);
if (router_neigh_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_neigh_ifinfo);
+ batadv_neigh_ifinfo_put(router_neigh_ifinfo);
if (router_coding_ifinfo)
- batadv_neigh_ifinfo_free_ref(router_coding_ifinfo);
+ batadv_neigh_ifinfo_put(router_coding_ifinfo);
return res;
}
@@ -1231,7 +1253,7 @@ out:
* Since the source encoded the packet we can be certain it has all necessary
* decode information.
*
- * Returns true if coding of a decoded packet is allowed.
+ * Return: true if coding of a decoded packet is allowed.
*/
static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
{
@@ -1249,7 +1271,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
* @skb: data skb to forward
* @eth_dst: next hop mac address of skb
*
- * Returns true if coding of a decoded skb is allowed.
+ * Return: true if coding of a decoded skb is allowed.
*/
static struct batadv_nc_packet *
batadv_nc_path_search(struct batadv_priv *bat_priv,
@@ -1317,7 +1339,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
* @eth_src: source mac address of skb
* @in_nc_node: pointer to skb next hop's neighbor nc node
*
- * Returns an nc packet if a suitable coding packet was found, NULL otherwise.
+ * Return: an nc packet if a suitable coding packet was found, NULL otherwise.
*/
static struct batadv_nc_packet *
batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
@@ -1350,7 +1372,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
}
rcu_read_unlock();
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return nc_packet;
}
@@ -1400,7 +1422,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
* next hop that potentially sent a packet which our next hop also received
* (overheard) and has stored for later decoding.
*
- * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ * Return: true if the skb was consumed (encoded packet sent) or false otherwise
*/
static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node,
@@ -1454,7 +1476,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
* @neigh_node: next hop to forward packet to
* @packet_id: checksum to identify packet
*
- * Returns true if the packet was buffered or false in case of an error.
+ * Return: true if the packet was buffered or false in case of an error.
*/
static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
struct batadv_nc_path *nc_path,
@@ -1488,7 +1510,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
* @skb: data skb to forward
* @neigh_node: next hop to forward packet to
*
- * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ * Return: true if the skb was consumed (encoded packet sent) or false otherwise
*/
bool batadv_nc_skb_forward(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node)
@@ -1533,7 +1555,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb,
return true;
free_nc_path:
- batadv_nc_path_free_ref(nc_path);
+ batadv_nc_path_put(nc_path);
out:
/* Packet is not consumed */
return false;
@@ -1595,7 +1617,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
free_skb:
kfree_skb(skb);
free_nc_path:
- batadv_nc_path_free_ref(nc_path);
+ batadv_nc_path_put(nc_path);
out:
return;
}
@@ -1627,7 +1649,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
* @skb: unicast skb to decode
* @nc_packet: decode data needed to decode the skb
*
- * Returns pointer to decoded unicast packet if the packet was decoded or NULL
+ * Return: pointer to decoded unicast packet if the packet was decoded or NULL
* in case of an error.
*/
static struct batadv_unicast_packet *
@@ -1721,7 +1743,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @ethhdr: pointer to the ethernet header inside the coded packet
* @coded: coded packet we try to find decode data for
*
- * Returns pointer to nc packet if the needed data was found or NULL otherwise.
+ * Return: pointer to nc packet if the needed data was found or NULL otherwise.
*/
static struct batadv_nc_packet *
batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
@@ -1784,6 +1806,9 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
* resulting unicast packet
* @skb: incoming coded packet
* @recv_if: pointer to interface this packet was received on
+ *
+ * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * otherwise.
*/
static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
@@ -1868,6 +1893,8 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
* batadv_nc_nodes_seq_print_text - print the nc node information
* @seq: seq file to print on
* @offset: not used
+ *
+ * Return: always 0
*/
int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
{
@@ -1923,13 +1950,15 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
/**
* batadv_nc_init_debugfs - create nc folder and related files in debugfs
* @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
*/
int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
{
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 8f6d4ad8778a..d6d7fb4ec5d5 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3c782a33bdac..e4cbb0753e37 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,11 +18,13 @@
#include "originator.h"
#include "main.h"
+#include <linux/atomic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -47,7 +49,13 @@ static struct lock_class_key batadv_orig_hash_lock_class_key;
static void batadv_purge_orig(struct work_struct *work);
-/* returns 1 if they are the same originator */
+/**
+ * batadv_compare_orig - comparing function used in the originator hash table
+ * @node: node in the local table
+ * @data2: second object to compare the node to
+ *
+ * Return: 1 if they are the same originator
+ */
int batadv_compare_orig(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_orig_node,
@@ -61,7 +69,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2)
* @orig_node: the originator serving the VLAN
* @vid: the VLAN identifier
*
- * Returns the vlan object identified by vid and belonging to orig_node or NULL
+ * Return: the vlan object identified by vid and belonging to orig_node or NULL
* if it does not exist.
*/
struct batadv_orig_node_vlan *
@@ -75,7 +83,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
if (tmp->vid != vid)
continue;
- if (!atomic_inc_not_zero(&tmp->refcount))
+ if (!kref_get_unless_zero(&tmp->refcount))
continue;
vlan = tmp;
@@ -93,7 +101,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
* @orig_node: the originator serving the VLAN
* @vid: the VLAN identifier
*
- * Returns NULL in case of failure or the vlan object identified by vid and
+ * Return: NULL in case of failure or the vlan object identified by vid and
* belonging to orig_node otherwise. The object is created and added to the list
* if it does not exist.
*
@@ -116,7 +124,8 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
if (!vlan)
goto out;
- atomic_set(&vlan->refcount, 2);
+ kref_init(&vlan->refcount);
+ kref_get(&vlan->refcount);
vlan->vid = vid;
hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
@@ -128,14 +137,27 @@ out:
}
/**
- * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free
+ * batadv_orig_node_vlan_release - release originator-vlan object from lists
+ * and queue for free after rcu grace period
+ * @ref: kref pointer of the originator-vlan object
+ */
+static void batadv_orig_node_vlan_release(struct kref *ref)
+{
+ struct batadv_orig_node_vlan *orig_vlan;
+
+ orig_vlan = container_of(ref, struct batadv_orig_node_vlan, refcount);
+
+ kfree_rcu(orig_vlan, rcu);
+}
+
+/**
+ * batadv_orig_node_vlan_put - decrement the refcounter and possibly release
* the originator-vlan object
* @orig_vlan: the originator-vlan object to release
*/
-void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan)
+void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
{
- if (atomic_dec_and_test(&orig_vlan->refcount))
- kfree_rcu(orig_vlan, rcu);
+ kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
}
int batadv_originator_init(struct batadv_priv *bat_priv)
@@ -163,90 +185,68 @@ err:
}
/**
- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object
- * @rcu: rcu pointer of the neigh_ifinfo object
+ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the neigh_ifinfo
*/
-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu)
+static void batadv_neigh_ifinfo_release(struct kref *ref)
{
struct batadv_neigh_ifinfo *neigh_ifinfo;
- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu);
+ neigh_ifinfo = container_of(ref, struct batadv_neigh_ifinfo, refcount);
if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing);
+ batadv_hardif_put(neigh_ifinfo->if_outgoing);
- kfree(neigh_ifinfo);
+ kfree_rcu(neigh_ifinfo, rcu);
}
/**
- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free
- * the neigh_ifinfo (without rcu callback)
- * @neigh_ifinfo: the neigh_ifinfo object to release
- */
-static void
-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo)
-{
- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu);
-}
-
-/**
- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free
+ * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release
* the neigh_ifinfo
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
-void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
+void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu);
+ kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
}
/**
- * batadv_hardif_neigh_free_rcu - free the hardif neigh_node
- * @rcu: rcu pointer of the neigh_node
+ * batadv_hardif_neigh_release - release hardif neigh node from lists and
+ * queue for free after rcu grace period
+ * @ref: kref pointer of the neigh_node
*/
-static void batadv_hardif_neigh_free_rcu(struct rcu_head *rcu)
+static void batadv_hardif_neigh_release(struct kref *ref)
{
struct batadv_hardif_neigh_node *hardif_neigh;
- hardif_neigh = container_of(rcu, struct batadv_hardif_neigh_node, rcu);
+ hardif_neigh = container_of(ref, struct batadv_hardif_neigh_node,
+ refcount);
spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
hlist_del_init_rcu(&hardif_neigh->list);
spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
- batadv_hardif_free_ref_now(hardif_neigh->if_incoming);
- kfree(hardif_neigh);
+ batadv_hardif_put(hardif_neigh->if_incoming);
+ kfree_rcu(hardif_neigh, rcu);
}
/**
- * batadv_hardif_neigh_free_now - decrement the hardif neighbors refcounter
- * and possibly free it (without rcu callback)
+ * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter
+ * and possibly release it
* @hardif_neigh: hardif neigh neighbor to free
*/
-static void
-batadv_hardif_neigh_free_now(struct batadv_hardif_neigh_node *hardif_neigh)
+void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
{
- if (atomic_dec_and_test(&hardif_neigh->refcount))
- batadv_hardif_neigh_free_rcu(&hardif_neigh->rcu);
+ kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
}
/**
- * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter
- * and possibly free it
- * @hardif_neigh: hardif neigh neighbor to free
+ * batadv_neigh_node_release - release neigh_node from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the neigh_node
*/
-void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh)
-{
- if (atomic_dec_and_test(&hardif_neigh->refcount))
- call_rcu(&hardif_neigh->rcu, batadv_hardif_neigh_free_rcu);
-}
-
-/**
- * batadv_neigh_node_free_rcu - free the neigh_node
- * @rcu: rcu pointer of the neigh_node
- */
-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
+static void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -254,51 +254,38 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
struct batadv_neigh_ifinfo *neigh_ifinfo;
struct batadv_algo_ops *bao;
- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
+ neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming,
neigh_node->addr);
if (hardif_neigh) {
/* batadv_hardif_neigh_get() increases refcount too */
- batadv_hardif_neigh_free_now(hardif_neigh);
- batadv_hardif_neigh_free_now(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
}
if (bao->bat_neigh_free)
bao->bat_neigh_free(neigh_node);
- batadv_hardif_free_ref_now(neigh_node->if_incoming);
+ batadv_hardif_put(neigh_node->if_incoming);
- kfree(neigh_node);
-}
-
-/**
- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter
- * and possibly free it (without rcu callback)
- * @neigh_node: neigh neighbor to free
- */
-static void
-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node)
-{
- if (atomic_dec_and_test(&neigh_node->refcount))
- batadv_neigh_node_free_rcu(&neigh_node->rcu);
+ kfree_rcu(neigh_node, rcu);
}
/**
- * batadv_neigh_node_free_ref - decrement the neighbors refcounter
- * and possibly free it
+ * batadv_neigh_node_put - decrement the neighbors refcounter and possibly
+ * release it
* @neigh_node: neigh neighbor to free
*/
-void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
+void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
{
- if (atomic_dec_and_test(&neigh_node->refcount))
- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu);
+ kref_put(&neigh_node->refcount, batadv_neigh_node_release);
}
/**
@@ -307,7 +294,7 @@ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
* @if_outgoing: the interface where the payload packet has been received or
* the OGM should be sent to
*
- * Returns the neighbor which should be router for this orig_node/iface.
+ * Return: the neighbor which should be router for this orig_node/iface.
*
* The object is returned with refcounter increased by 1.
*/
@@ -327,7 +314,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
break;
}
- if (router && !atomic_inc_not_zero(&router->refcount))
+ if (router && !kref_get_unless_zero(&router->refcount))
router = NULL;
rcu_read_unlock();
@@ -339,7 +326,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
- * Returns the requested orig_ifinfo or NULL if not found.
+ * Return: the requested orig_ifinfo or NULL if not found.
*
* The object is returned with refcounter increased by 1.
*/
@@ -355,7 +342,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
if (tmp->if_outgoing != if_outgoing)
continue;
- if (!atomic_inc_not_zero(&tmp->refcount))
+ if (!kref_get_unless_zero(&tmp->refcount))
continue;
orig_ifinfo = tmp;
@@ -371,7 +358,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
- * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing
+ * Return: NULL in case of failure or the orig_ifinfo object for the if_outgoing
* interface otherwise. The object is created and added to the list
* if it does not exist.
*
@@ -395,7 +382,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
goto out;
if (if_outgoing != BATADV_IF_DEFAULT &&
- !atomic_inc_not_zero(&if_outgoing->refcount)) {
+ !kref_get_unless_zero(&if_outgoing->refcount)) {
kfree(orig_ifinfo);
orig_ifinfo = NULL;
goto out;
@@ -406,7 +393,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
orig_ifinfo->batman_seqno_reset = reset_time;
orig_ifinfo->if_outgoing = if_outgoing;
INIT_HLIST_NODE(&orig_ifinfo->list);
- atomic_set(&orig_ifinfo->refcount, 2);
+ kref_init(&orig_ifinfo->refcount);
+ kref_get(&orig_ifinfo->refcount);
hlist_add_head_rcu(&orig_ifinfo->list,
&orig_node->ifinfo_list);
out:
@@ -416,12 +404,12 @@ out:
/**
* batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node
- * @neigh_node: the neigh node to be queried
+ * @neigh: the neigh node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
* The object is returned with refcounter increased by 1.
*
- * Returns the requested neigh_ifinfo or NULL if not found
+ * Return: the requested neigh_ifinfo or NULL if not found
*/
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
@@ -436,7 +424,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
if (tmp_neigh_ifinfo->if_outgoing != if_outgoing)
continue;
- if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount))
+ if (!kref_get_unless_zero(&tmp_neigh_ifinfo->refcount))
continue;
neigh_ifinfo = tmp_neigh_ifinfo;
@@ -449,10 +437,10 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
/**
* batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object
- * @neigh_node: the neigh node to be queried
+ * @neigh: the neigh node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
- * Returns NULL in case of failure or the neigh_ifinfo object for the
+ * Return: NULL in case of failure or the neigh_ifinfo object for the
* if_outgoing interface otherwise. The object is created and added to the list
* if it does not exist.
*
@@ -474,14 +462,15 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
if (!neigh_ifinfo)
goto out;
- if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) {
+ if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) {
kfree(neigh_ifinfo);
neigh_ifinfo = NULL;
goto out;
}
INIT_HLIST_NODE(&neigh_ifinfo->list);
- atomic_set(&neigh_ifinfo->refcount, 2);
+ kref_init(&neigh_ifinfo->refcount);
+ kref_get(&neigh_ifinfo->refcount);
neigh_ifinfo->if_outgoing = if_outgoing;
hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
@@ -500,7 +489,8 @@ out:
*
* Looks for and possibly returns a neighbour belonging to this originator list
* which is connected through the provided hard interface.
- * Returns NULL if the neighbour is not found.
+ *
+ * Return: neighbor when found. Othwerwise NULL
*/
static struct batadv_neigh_node *
batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
@@ -517,7 +507,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
if (tmp_neigh_node->if_incoming != hard_iface)
continue;
- if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
continue;
res = tmp_neigh_node;
@@ -533,7 +523,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
*
- * Returns the hardif neighbour node if found or created or NULL otherwise.
+ * Return: the hardif neighbour node if found or created or NULL otherwise.
*/
static struct batadv_hardif_neigh_node *
batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
@@ -549,12 +539,12 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
if (hardif_neigh)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount))
+ if (!kref_get_unless_zero(&hard_iface->refcount))
goto out;
hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
if (!hardif_neigh) {
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
goto out;
}
@@ -563,7 +553,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
hardif_neigh->if_incoming = hard_iface;
hardif_neigh->last_seen = jiffies;
- atomic_set(&hardif_neigh->refcount, 1);
+ kref_init(&hardif_neigh->refcount);
if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
@@ -581,7 +571,7 @@ out:
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
*
- * Returns the hardif neighbour node if found or created or NULL otherwise.
+ * Return: the hardif neighbour node if found or created or NULL otherwise.
*/
static struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
@@ -603,7 +593,8 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
* @neigh_addr: the address of the neighbour
*
* Looks for and possibly returns a neighbour belonging to this hard interface.
- * Returns NULL if the neighbour is not found.
+ *
+ * Return: neighbor when found. Othwerwise NULL
*/
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
@@ -617,7 +608,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr))
continue;
- if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount))
+ if (!kref_get_unless_zero(&tmp_hardif_neigh->refcount))
continue;
hardif_neigh = tmp_hardif_neigh;
@@ -635,7 +626,8 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
* @neigh_addr: the mac address of the neighbour interface
*
* Allocates a new neigh_node object and initialises all the generic fields.
- * Returns the new object or NULL on failure.
+ *
+ * Return: neighbor when found. Othwerwise NULL
*/
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_orig_node *orig_node,
@@ -658,7 +650,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
if (!neigh_node)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount)) {
+ if (!kref_get_unless_zero(&hard_iface->refcount)) {
kfree(neigh_node);
neigh_node = NULL;
goto out;
@@ -673,14 +665,15 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
neigh_node->orig_node = orig_node;
/* extra reference for return */
- atomic_set(&neigh_node->refcount, 2);
+ kref_init(&neigh_node->refcount);
+ kref_get(&neigh_node->refcount);
spin_lock_bh(&orig_node->neigh_list_lock);
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
spin_unlock_bh(&orig_node->neigh_list_lock);
/* increment unique neighbor refcount */
- atomic_inc(&hardif_neigh->refcount);
+ kref_get(&hardif_neigh->refcount);
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
"Creating new neighbor %pM for orig_node %pM on interface %s\n",
@@ -688,7 +681,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
out:
if (hardif_neigh)
- batadv_hardif_neigh_free_ref(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
return neigh_node;
}
@@ -697,7 +690,7 @@ out:
* @seq: neighbour table seq_file struct
* @offset: not used
*
- * Always returns 0.
+ * Return: always 0
*/
int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
{
@@ -714,7 +707,7 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
primary_if->net_dev->dev_addr, net_dev->name,
bat_priv->bat_algo_ops->name);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (!bat_priv->bat_algo_ops->bat_neigh_print) {
seq_puts(seq,
@@ -727,57 +720,72 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
}
/**
- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
- * @rcu: rcu pointer of the orig_ifinfo object
+ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the orig_ifinfo
*/
-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
+static void batadv_orig_ifinfo_release(struct kref *ref)
{
struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
+ orig_ifinfo = container_of(ref, struct batadv_orig_ifinfo, refcount);
if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ batadv_hardif_put(orig_ifinfo->if_outgoing);
/* this is the last reference to this object */
router = rcu_dereference_protected(orig_ifinfo->router, true);
if (router)
- batadv_neigh_node_free_ref_now(router);
- kfree(orig_ifinfo);
+ batadv_neigh_node_put(router);
+
+ kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo (without rcu callback)
+ * batadv_orig_ifinfo_put - decrement the refcounter and possibly release
+ * the orig_ifinfo
* @orig_ifinfo: the orig_ifinfo object to release
*/
-static void
-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo)
+void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
{
- if (atomic_dec_and_test(&orig_ifinfo->refcount))
- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu);
+ kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
+ * batadv_orig_node_free_rcu - free the orig_node
+ * @rcu: rcu pointer of the orig_node
*/
-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
+static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
{
- if (atomic_dec_and_test(&orig_ifinfo->refcount))
- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
+ struct batadv_orig_node *orig_node;
+
+ orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+
+ batadv_mcast_purge_orig(orig_node);
+
+ batadv_frag_purge_orig(orig_node, NULL);
+
+ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
+ kfree(orig_node->tt_buff);
+ kfree(orig_node);
}
-static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_orig_node_release - release orig_node from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the orig_node
+ */
+static void batadv_orig_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
struct batadv_orig_node *orig_node;
struct batadv_orig_ifinfo *orig_ifinfo;
- orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+ orig_node = container_of(ref, struct batadv_orig_node, refcount);
spin_lock_bh(&orig_node->neigh_list_lock);
@@ -785,50 +793,30 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
hlist_for_each_entry_safe(neigh_node, node_tmp,
&orig_node->neigh_list, list) {
hlist_del_rcu(&neigh_node->list);
- batadv_neigh_node_free_ref_now(neigh_node);
+ batadv_neigh_node_put(neigh_node);
}
hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
&orig_node->ifinfo_list, list) {
hlist_del_rcu(&orig_ifinfo->list);
- batadv_orig_ifinfo_free_ref_now(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
}
spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_mcast_purge_orig(orig_node);
-
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
- batadv_frag_purge_orig(orig_node, NULL);
-
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
-
- kfree(orig_node->tt_buff);
- kfree(orig_node);
+ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
/**
- * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
- * schedule an rcu callback for freeing it
+ * batadv_orig_node_put - decrement the orig node refcounter and possibly
+ * release it
* @orig_node: the orig node to free
*/
-void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
+void batadv_orig_node_put(struct batadv_orig_node *orig_node)
{
- if (atomic_dec_and_test(&orig_node->refcount))
- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
-}
-
-/**
- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
- * possibly free it (without rcu callback)
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
-{
- if (atomic_dec_and_test(&orig_node->refcount))
- batadv_orig_node_free_rcu(&orig_node->rcu);
+ kref_put(&orig_node->refcount, batadv_orig_node_release);
}
void batadv_originator_free(struct batadv_priv *bat_priv)
@@ -855,7 +843,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
hlist_for_each_entry_safe(orig_node, node_tmp,
head, hash_entry) {
hlist_del_rcu(&orig_node->hash_entry);
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
spin_unlock_bh(list_lock);
}
@@ -870,7 +858,8 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
*
* Creates a new originator object and initialise all the generic fields.
* The new object is not added to the originator list.
- * Returns the newly created object or NULL on failure.
+ *
+ * Return: the newly created object or NULL on failure.
*/
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr)
@@ -899,7 +888,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
batadv_nc_init_orig(orig_node);
/* extra reference for return */
- atomic_set(&orig_node->refcount, 2);
+ kref_init(&orig_node->refcount);
+ kref_get(&orig_node->refcount);
orig_node->bat_priv = bat_priv;
ether_addr_copy(orig_node->orig, addr);
@@ -927,7 +917,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
* Immediately release vlan since it is not needed anymore in this
* context
*/
- batadv_orig_node_vlan_free_ref(vlan);
+ batadv_orig_node_vlan_put(vlan);
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
INIT_HLIST_HEAD(&orig_node->fragments[i].head);
@@ -976,7 +966,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
neigh->addr, if_outgoing->net_dev->name);
hlist_del_rcu(&neigh_ifinfo->list);
- batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
spin_unlock_bh(&neigh->ifinfo_lock);
@@ -987,7 +977,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
*
- * Returns true if any ifinfo entry was purged, false otherwise.
+ * Return: true if any ifinfo entry was purged, false otherwise.
*/
static bool
batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
@@ -1022,10 +1012,10 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
ifinfo_purged = true;
hlist_del_rcu(&orig_ifinfo->list);
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
if (orig_node->last_bonding_candidate == orig_ifinfo) {
orig_node->last_bonding_candidate = NULL;
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
}
}
@@ -1039,7 +1029,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
*
- * Returns true if any neighbor was purged, false otherwise
+ * Return: true if any neighbor was purged, false otherwise
*/
static bool
batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
@@ -1079,7 +1069,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
neigh_purged = true;
hlist_del_rcu(&neigh_node->list);
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
} else {
/* only necessary if not the whole neighbor is to be
* deleted, but some interface has been removed.
@@ -1098,7 +1088,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
* @orig_node: orig node which is to be checked
* @if_outgoing: the interface for which the metric should be compared
*
- * Returns the current best neighbor, with refcount increased.
+ * Return: the current best neighbor, with refcount increased.
*/
static struct batadv_neigh_node *
batadv_find_best_neighbor(struct batadv_priv *bat_priv,
@@ -1114,11 +1104,11 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
best, if_outgoing) <= 0))
continue;
- if (!atomic_inc_not_zero(&neigh->refcount))
+ if (!kref_get_unless_zero(&neigh->refcount))
continue;
if (best)
- batadv_neigh_node_free_ref(best);
+ batadv_neigh_node_put(best);
best = neigh;
}
@@ -1135,7 +1125,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
* This function checks if the orig_node or substructures of it have become
* obsolete, and purges this information if that's the case.
*
- * Returns true if the orig_node is to be removed, false otherwise.
+ * Return: true if the orig_node is to be removed, false otherwise.
*/
static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
@@ -1164,7 +1154,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
best_neigh_node);
if (best_neigh_node)
- batadv_neigh_node_free_ref(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
/* ... then for all other interfaces. */
rcu_read_lock();
@@ -1181,7 +1171,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
batadv_update_route(bat_priv, orig_node, hard_iface,
best_neigh_node);
if (best_neigh_node)
- batadv_neigh_node_free_ref(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
}
rcu_read_unlock();
@@ -1214,7 +1204,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
batadv_tt_global_del_orig(orig_node->bat_priv,
orig_node, -1,
"originator timed out");
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
continue;
}
@@ -1260,7 +1250,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
primary_if->net_dev->dev_addr, net_dev->name,
bat_priv->bat_algo_ops->name);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (!bat_priv->bat_algo_ops->bat_orig_print) {
seq_puts(seq,
@@ -1280,7 +1270,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
* @seq: debugfs table seq_file struct
* @offset: not used
*
- * Returns 0
+ * Return: 0
*/
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
{
@@ -1316,7 +1306,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
out:
if (hard_iface)
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return 0;
}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 29557753d552..4e8b67f11051 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -20,10 +20,10 @@
#include "main.h"
-#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
+#include <linux/kref.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/stddef.h>
@@ -37,20 +37,19 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
-void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
+void batadv_orig_node_put(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
void
-batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh);
+batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
+void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
@@ -60,7 +59,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
-void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo);
+void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
@@ -70,7 +69,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
struct batadv_orig_ifinfo *
batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing);
-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo);
+void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
@@ -84,7 +83,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
unsigned short vid);
-void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
+void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
/* hashfunction to choose an entry in a hash table of given size
* hash algorithm from http://en.wikipedia.org/wiki/Hash_table
@@ -116,7 +115,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
if (!batadv_compare_eth(orig_node, data))
continue;
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!kref_get_unless_zero(&orig_node->refcount))
continue;
orig_node_tmp = orig_node;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 0558e3237e0e..e7f915181aba 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -158,7 +158,7 @@ enum batadv_tt_client_flags {
};
/**
- * batadv_vlan_flags - flags for the four MSB of any vlan ID field
+ * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
* @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
*/
enum batadv_vlan_flags {
@@ -209,6 +209,11 @@ struct batadv_bla_claim_dst {
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
* @flags: contains routing relevant flags - see enum batadv_iv_flags
+ * @seqno: sequence identification
+ * @orig: address of the source node
+ * @prev_sender: address of the previous sender
+ * @reserved: reserved byte for alignment
+ * @tq: transmission quality
* @tvlv_len: length of tvlv data following the ogm header
*/
struct batadv_ogm_packet {
@@ -230,7 +235,7 @@ struct batadv_ogm_packet {
#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
/**
- * batadv_icmp_header - common members among all the ICMP packets
+ * struct batadv_icmp_header - common members among all the ICMP packets
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
@@ -256,7 +261,7 @@ struct batadv_icmp_header {
};
/**
- * batadv_icmp_packet - ICMP packet
+ * struct batadv_icmp_packet - ICMP packet
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
@@ -282,7 +287,7 @@ struct batadv_icmp_packet {
#define BATADV_RR_LEN 16
/**
- * batadv_icmp_packet_rr - ICMP RouteRecord packet
+ * struct batadv_icmp_packet_rr - ICMP RouteRecord packet
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
@@ -345,6 +350,7 @@ struct batadv_unicast_packet {
* @u: common unicast packet header
* @src: address of the source
* @subtype: packet subtype
+ * @reserved: reserved byte for alignment
*/
struct batadv_unicast_4addr_packet {
struct batadv_unicast_packet u;
@@ -413,7 +419,6 @@ struct batadv_bcast_packet {
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
- * @reserved: Align following fields to 2-byte boundaries
* @first_source: original source of first included packet
* @first_orig_dest: original destinal of first included packet
* @first_crc: checksum of first included packet
@@ -495,7 +500,7 @@ struct batadv_tvlv_gateway_data {
* struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
* @flags: translation table flags (see batadv_tt_data_flags)
* @ttvn: translation table version number
- * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by
+ * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
* one batadv_tvlv_tt_vlan_data object per announced vlan
*/
struct batadv_tvlv_tt_data {
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e4f2646d9246..4dd646a52f1a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -25,6 +25,7 @@
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
@@ -72,7 +73,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
rcu_read_lock();
curr_router = rcu_dereference(orig_ifinfo->router);
- if (curr_router && !atomic_inc_not_zero(&curr_router->refcount))
+ if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
curr_router = NULL;
rcu_read_unlock();
@@ -97,20 +98,20 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
}
if (curr_router)
- batadv_neigh_node_free_ref(curr_router);
+ batadv_neigh_node_put(curr_router);
/* increase refcount of new best neighbor */
- if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount))
+ if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount))
neigh_node = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
/* decrease refcount of previous best neighbor */
if (curr_router)
- batadv_neigh_node_free_ref(curr_router);
+ batadv_neigh_node_put(curr_router);
}
/**
@@ -137,24 +138,38 @@ void batadv_update_route(struct batadv_priv *bat_priv,
out:
if (router)
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
}
-/* checks whether the host restarted and is in the protection time.
- * returns:
- * 0 if the packet is to be accepted
+/**
+ * batadv_window_protected - checks whether the host restarted and is in the
+ * protection time.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq_num_diff: difference between the current/received sequence number and
+ * the last sequence number
+ * @seq_old_max_diff: maximum age of sequence number not considered as restart
+ * @last_reset: jiffies timestamp of the last reset, will be updated when reset
+ * is detected
+ * @protection_started: is set to true if the protection window was started,
+ * doesn't change otherwise.
+ *
+ * Return:
+ * 0 if the packet is to be accepted.
* 1 if the packet is to be ignored.
*/
int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
- unsigned long *last_reset)
+ s32 seq_old_max_diff, unsigned long *last_reset,
+ bool *protection_started)
{
- if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE ||
+ if (seq_num_diff <= -seq_old_max_diff ||
seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
if (!batadv_has_timed_out(*last_reset,
BATADV_RESET_PROTECTION_MS))
return 1;
*last_reset = jiffies;
+ if (protection_started)
+ *protection_started = true;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"old packet received, start protection\n");
}
@@ -198,7 +213,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
* @bat_priv: the bat priv with all the soft interface information
* @skb: icmp packet to process
*
- * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
* otherwise.
*/
static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
@@ -254,9 +269,9 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -302,9 +317,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -388,7 +403,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -398,10 +413,11 @@ out:
* @skb: packet to check
* @hdr_size: size of header to pull
*
- * Check for short header and bad addresses in given packet. Returns negative
- * value when check fails and 0 otherwise. The negative value depends on the
- * reason: -ENODATA for bad header, -EBADR for broadcast destination or source,
- * and -EREMOTE for non-local (other host) destination.
+ * Check for short header and bad addresses in given packet.
+ *
+ * Return: negative value when check fails and 0 otherwise. The negative value
+ * depends on the reason: -ENODATA for bad header, -EBADR for broadcast
+ * destination or source, and -EREMOTE for non-local (other host) destination.
*/
static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -435,7 +451,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
* @orig_node: the destination node
* @recv_if: pointer to interface this packet was received on
*
- * Returns the router which should be used for this orig_node on
+ * Return: the router which should be used for this orig_node on
* this interface, or NULL if not available.
*/
struct batadv_neigh_node *
@@ -482,14 +498,14 @@ batadv_find_router(struct batadv_priv *bat_priv,
hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) {
/* acquire some structures and references ... */
- if (!atomic_inc_not_zero(&cand->refcount))
+ if (!kref_get_unless_zero(&cand->refcount))
continue;
cand_router = rcu_dereference(cand->router);
if (!cand_router)
goto next;
- if (!atomic_inc_not_zero(&cand_router->refcount)) {
+ if (!kref_get_unless_zero(&cand_router->refcount)) {
cand_router = NULL;
goto next;
}
@@ -508,8 +524,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
/* mark the first possible candidate */
if (!first_candidate) {
- atomic_inc(&cand_router->refcount);
- atomic_inc(&cand->refcount);
+ kref_get(&cand_router->refcount);
+ kref_get(&cand->refcount);
first_candidate = cand;
first_candidate_router = cand_router;
}
@@ -529,16 +545,16 @@ batadv_find_router(struct batadv_priv *bat_priv,
next:
/* free references */
if (cand_router) {
- batadv_neigh_node_free_ref(cand_router);
+ batadv_neigh_node_put(cand_router);
cand_router = NULL;
}
- batadv_orig_ifinfo_free_ref(cand);
+ batadv_orig_ifinfo_put(cand);
}
rcu_read_unlock();
/* last_bonding_candidate is reset below, remove the old reference. */
if (orig_node->last_bonding_candidate)
- batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate);
+ batadv_orig_ifinfo_put(orig_node->last_bonding_candidate);
/* After finding candidates, handle the three cases:
* 1) there is a next candidate, use that
@@ -546,17 +562,17 @@ next:
* 3) there is no candidate at all, return the default router
*/
if (next_candidate) {
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
/* remove references to first candidate, we don't need it. */
if (first_candidate) {
- batadv_neigh_node_free_ref(first_candidate_router);
- batadv_orig_ifinfo_free_ref(first_candidate);
+ batadv_neigh_node_put(first_candidate_router);
+ batadv_orig_ifinfo_put(first_candidate);
}
router = next_candidate_router;
orig_node->last_bonding_candidate = next_candidate;
} else if (first_candidate) {
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
/* refcounting has already been done in the loop above. */
router = first_candidate_router;
@@ -633,7 +649,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -648,7 +664,7 @@ out:
* the new corresponding information (originator address where the destination
* client currently is and its known TTVN)
*
- * Returns true if the packet header has been updated, false otherwise
+ * Return: true if the packet header has been updated, false otherwise
*/
static bool
batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
@@ -686,9 +702,9 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
ret = true;
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -752,7 +768,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
return 0;
curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
/* check if the TTVN contained in the packet is fresher than what the
@@ -792,7 +808,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
unicast_packet->ttvn = curr_ttvn;
@@ -805,7 +821,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* @skb: unicast tvlv packet to process
* @recv_if: pointer to interface this packet was received on
*
- * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
* otherwise.
*/
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
@@ -892,7 +908,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
rx_success:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
@@ -904,9 +920,8 @@ rx_success:
* batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
* @skb: unicast tvlv packet to process
* @recv_if: pointer to interface this packet was received on
- * @dst_addr: the payload destination
*
- * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
* otherwise.
*/
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
@@ -960,7 +975,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
* the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
* lack further fragments; 3) Merge fragments, if we have all needed parts.
*
- * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
*/
int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
@@ -1004,7 +1019,7 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
out:
if (orig_node_src)
- batadv_orig_node_free_ref(orig_node_src);
+ batadv_orig_node_put(orig_node_src);
return ret;
}
@@ -1065,7 +1080,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
/* check whether the packet is old and the host just restarted. */
if (batadv_window_protected(bat_priv, seq_diff,
- &orig_node->bcast_seqno_reset))
+ BATADV_BCAST_MAX_AGE,
+ &orig_node->bcast_seqno_reset, NULL))
goto spin_unlock;
/* mark broadcast in flood history, update window position
@@ -1108,6 +1124,6 @@ spin_unlock:
spin_unlock_bh(&orig_node->bcast_seqno_lock);
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 204bbe4952a6..02a5caa84127 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -52,6 +52,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
- unsigned long *last_reset);
+ s32 seq_old_max_diff, unsigned long *last_reset,
+ bool *protection_started);
#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 782fa33ec296..caff32cf6fe7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -111,7 +111,7 @@ send_skb_err:
* host, NULL can be passed as recv_if and no interface alternating is
* attempted.
*
- * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
+ * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
* NET_XMIT_POLICED if the skb is buffered for later transmit.
*/
int batadv_send_skb_to_orig(struct sk_buff *skb,
@@ -153,7 +153,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
out:
if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
@@ -165,7 +165,7 @@ out:
* @hdr_size: amount of bytes to push at the beginning of the skb
* @orig_node: the destination node
*
- * Returns false if the buffer extension was not possible or true otherwise.
+ * Return: false if the buffer extension was not possible or true otherwise.
*/
static bool
batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
@@ -196,7 +196,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
* @skb: the skb containing the payload to encapsulate
* @orig_node: the destination node
*
- * Returns false if the payload could not be encapsulated or true otherwise.
+ * Return: false if the payload could not be encapsulated or true otherwise.
*/
static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
struct batadv_orig_node *orig_node)
@@ -211,10 +211,10 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
* unicast 4addr header
* @bat_priv: the bat priv with all the soft interface information
* @skb: the skb containing the payload to encapsulate
- * @orig_node: the destination node
+ * @orig: the destination node
* @packet_subtype: the unicast 4addr packet subtype to use
*
- * Returns false if the payload could not be encapsulated or true otherwise.
+ * Return: false if the payload could not be encapsulated or true otherwise.
*/
bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
struct sk_buff *skb,
@@ -246,7 +246,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
ret = true;
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return ret;
}
@@ -265,7 +265,7 @@ out:
* as packet_type. Then send this frame to the given orig_node and release a
* reference to this orig_node.
*
- * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
struct sk_buff *skb, int packet_type,
@@ -317,7 +317,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
if (ret == NET_XMIT_DROP)
kfree_skb(skb);
return ret;
@@ -339,7 +339,7 @@ out:
* BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame
* to the according destination node.
*
- * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
struct sk_buff *skb, int packet_type,
@@ -373,7 +373,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
* Look up the currently selected gateway. Wrap the given skb into a batman-adv
* unicast header and send this frame to this gateway node.
*
- * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid)
@@ -409,9 +409,9 @@ static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
{
kfree_skb(forw_packet->skb);
if (forw_packet->if_incoming)
- batadv_hardif_free_ref(forw_packet->if_incoming);
+ batadv_hardif_put(forw_packet->if_incoming);
if (forw_packet->if_outgoing)
- batadv_hardif_free_ref(forw_packet->if_outgoing);
+ batadv_hardif_put(forw_packet->if_outgoing);
kfree(forw_packet);
}
@@ -430,14 +430,19 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
send_time);
}
-/* add a broadcast packet to the queue and setup timers. broadcast packets
- * are sent multiple times to increase probability for being received.
+/**
+ * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
*
- * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on
- * errors.
+ * add a broadcast packet to the queue and setup timers. broadcast packets
+ * are sent multiple times to increase probability for being received.
*
* The skb is not consumed, so the caller should make sure that the
* skb is freed.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
*/
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
@@ -492,7 +497,7 @@ out_and_inc:
atomic_inc(&bat_priv->bcast_queue_left);
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return NETDEV_TX_BUSY;
}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 82059f259e46..7ff95cada2e7 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -69,7 +69,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
* header via the translation table. Wrap the given skb into a batman-adv
* unicast header. Then send this frame to the according destination node.
*
- * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
struct sk_buff *skb, u8 *dst_hint,
@@ -92,7 +92,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
* unicast-4addr header. Then send this frame to the according destination
* node.
*
- * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv,
struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..0710379491bf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -30,6 +30,7 @@
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -376,7 +377,7 @@ dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
}
@@ -478,22 +479,34 @@ out:
}
/**
- * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and
- * possibly free it
- * @softif_vlan: the vlan object to release
+ * batadv_softif_vlan_release - release vlan from lists and queue for free after
+ * rcu grace period
+ * @ref: kref pointer of the vlan object
*/
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
+static void batadv_softif_vlan_release(struct kref *ref)
+{
+ struct batadv_softif_vlan *vlan;
+
+ vlan = container_of(ref, struct batadv_softif_vlan, refcount);
+
+ spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+ hlist_del_rcu(&vlan->list);
+ spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+
+ kfree_rcu(vlan, rcu);
+}
+
+/**
+ * batadv_softif_vlan_put - decrease the vlan object refcounter and
+ * possibly release it
+ * @vlan: the vlan object to release
+ */
+void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
{
if (!vlan)
return;
- if (atomic_dec_and_test(&vlan->refcount)) {
- spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
- hlist_del_rcu(&vlan->list);
- spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
-
- kfree_rcu(vlan, rcu);
- }
+ kref_put(&vlan->refcount, batadv_softif_vlan_release);
}
/**
@@ -501,7 +514,7 @@ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
* @bat_priv: the bat priv with all the soft interface information
* @vid: the identifier of the vlan object to retrieve
*
- * Returns the private data of the vlan matching the vid passed as argument or
+ * Return: the private data of the vlan matching the vid passed as argument or
* NULL otherwise. The refcounter of the returned object is incremented by 1.
*/
struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
@@ -514,7 +527,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
if (vlan_tmp->vid != vid)
continue;
- if (!atomic_inc_not_zero(&vlan_tmp->refcount))
+ if (!kref_get_unless_zero(&vlan_tmp->refcount))
continue;
vlan = vlan_tmp;
@@ -530,7 +543,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier
*
- * Returns 0 on success, a negative error otherwise.
+ * Return: 0 on success, a negative error otherwise.
*/
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
{
@@ -539,7 +552,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (vlan) {
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
return -EEXIST;
}
@@ -549,7 +562,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
vlan->bat_priv = bat_priv;
vlan->vid = vid;
- atomic_set(&vlan->refcount, 1);
+ kref_init(&vlan->refcount);
atomic_set(&vlan->ap_isolation, 0);
@@ -588,18 +601,19 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
vlan->vid, "vlan interface destroyed", false);
batadv_sysfs_del_vlan(bat_priv, vlan);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
}
/**
* batadv_interface_add_vid - ndo_add_vid API implementation
* @dev: the netdev of the mesh interface
+ * @proto: protocol of the the vlan id
* @vid: identifier of the new vlan
*
* Set up all the internal structures for handling the new vlan on top of the
* mesh interface
*
- * Returns 0 on success or a negative error code in case of failure.
+ * Return: 0 on success or a negative error code in case of failure.
*/
static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
unsigned short vid)
@@ -632,7 +646,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
if (!vlan->kobj) {
ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
if (ret) {
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
return ret;
}
}
@@ -651,12 +665,13 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
/**
* batadv_interface_kill_vid - ndo_kill_vid API implementation
* @dev: the netdev of the mesh interface
+ * @proto: protocol of the the vlan id
* @vid: identifier of the deleted vlan
*
* Destroy all the internal structures used to handle the vlan identified by vid
* on top of the mesh interface
*
- * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q
+ * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q
* or -ENOENT if the specified vlan id wasn't registered.
*/
static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
@@ -678,7 +693,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
batadv_softif_destroy_vlan(bat_priv, vlan);
/* finally free the vlan object */
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
return 0;
}
@@ -734,7 +749,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
if (vlan) {
batadv_softif_destroy_vlan(bat_priv, vlan);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
}
batadv_sysfs_del_meshif(soft_iface);
@@ -745,7 +760,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
* batadv_softif_init_late - late stage initialization of soft interface
* @dev: registered network device to modify
*
- * Returns error code on failures
+ * Return: error code on failures
*/
static int batadv_softif_init_late(struct net_device *dev)
{
@@ -847,7 +862,7 @@ free_bat_counters:
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should become the slave interface
*
- * Return 0 if successful or error otherwise.
+ * Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_add(struct net_device *dev,
struct net_device *slave_dev)
@@ -863,7 +878,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
out:
if (hard_iface)
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -872,7 +887,7 @@ out:
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should be removed from the master interface
*
- * Return 0 if successful or error otherwise.
+ * Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_del(struct net_device *dev,
struct net_device *slave_dev)
@@ -890,7 +905,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
out:
if (hard_iface)
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 8e82176f40b1..9ae265703d23 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -34,7 +34,7 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan);
+void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
unsigned short vid);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe87777fda8a..4d70d4413e40 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
+#include <linux/kref.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@ -64,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
* batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv
* @obj: kobject to covert
*
- * Returns the associated batadv_priv struct.
+ * Return: the associated batadv_priv struct.
*/
static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
{
@@ -82,9 +83,10 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
/**
* batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct
+ * @bat_priv: the bat priv with all the soft interface information
* @obj: kobject to covert
*
- * Returns the associated softif_vlan struct if found, NULL otherwise.
+ * Return: the associated softif_vlan struct if found, NULL otherwise.
*/
static struct batadv_softif_vlan *
batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
@@ -96,7 +98,7 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
if (vlan_tmp->kobj != obj)
continue;
- if (!atomic_inc_not_zero(&vlan_tmp->refcount))
+ if (!kref_get_unless_zero(&vlan_tmp->refcount))
continue;
vlan = vlan_tmp;
@@ -214,7 +216,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
attr, &vlan->_name, \
bat_priv->soft_iface); \
\
- batadv_softif_vlan_free_ref(vlan); \
+ batadv_softif_vlan_put(vlan); \
return res; \
}
@@ -229,7 +231,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
atomic_read(&vlan->_name) == 0 ? \
"disabled" : "enabled"); \
\
- batadv_softif_vlan_free_ref(vlan); \
+ batadv_softif_vlan_put(vlan); \
return res; \
}
@@ -491,7 +493,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
* @attr: the batman-adv attribute the user is interacting with
* @buff: the buffer that will contain the data to send back to the user
*
- * Returns the number of bytes written into 'buff' on success or a negative
+ * Return: the number of bytes written into 'buff' on success or a negative
* error code in case of failure
*/
static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
@@ -511,7 +513,7 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
* @buff: the buffer containing the user data
* @count: number of bytes in the buffer
*
- * Returns 'count' on success or a negative error code in case of failure
+ * Return: 'count' on success or a negative error code in case of failure
*/
static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
struct attribute *attr, char *buff,
@@ -620,9 +622,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
-/**
- * batadv_vlan_attrs - array of vlan specific sysfs attributes
- */
+/* array of vlan specific sysfs attributes */
static struct batadv_attribute *batadv_vlan_attrs[] = {
&batadv_attr_vlan_ap_isolation,
NULL,
@@ -683,7 +683,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
* @dev: netdev of the mesh interface
* @vlan: private data of the newly added VLAN interface
*
- * Returns 0 on success and -ENOMEM if any of the structure allocations fails.
+ * Return: 0 on success and -ENOMEM if any of the structure allocations fails.
*/
int batadv_sysfs_add_vlan(struct net_device *dev,
struct batadv_softif_vlan *vlan)
@@ -771,7 +771,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
length = sprintf(buff, "%s\n", ifname);
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return length;
}
@@ -795,7 +795,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
if (strlen(buff) >= IFNAMSIZ) {
pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
buff);
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return -EINVAL;
}
@@ -829,7 +829,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
unlock:
rtnl_unlock();
out:
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -863,7 +863,7 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj,
break;
}
- batadv_hardif_free_ref(hard_iface);
+ batadv_hardif_put(hard_iface);
return length;
}
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 61974428a7af..c76021b4e198 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a22080c53401..0b43e86328a5 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
@@ -31,6 +31,7 @@
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -68,7 +69,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
unsigned short vid, const char *message,
bool roaming);
-/* returns 1 if they are the same mac addr and vid */
+/**
+ * batadv_compare_tt - check if two TT entries are the same
+ * @node: the list element pointer of the first TT entry
+ * @data2: pointer to the tt_common_entry of the second TT entry
+ *
+ * Compare the MAC address and the VLAN ID of the two TT entries and check if
+ * they are the same TT client.
+ * Return: 1 if the two TT clients are the same, 0 otherwise
+ */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_tt_common_entry,
@@ -84,7 +93,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
* @data: pointer to the tt_common_entry object to map
* @size: the size of the hash table
*
- * Returns the hash index where the object represented by 'data' should be
+ * Return: the hash index where the object represented by 'data' should be
* stored at.
*/
static inline u32 batadv_choose_tt(const void *data, u32 size)
@@ -105,7 +114,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size)
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the tt_common struct belonging to the searched client if
+ * Return: a pointer to the tt_common struct belonging to the searched client if
* found, NULL otherwise.
*/
static struct batadv_tt_common_entry *
@@ -133,7 +142,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
if (tt->vid != vid)
continue;
- if (!atomic_inc_not_zero(&tt->refcount))
+ if (!kref_get_unless_zero(&tt->refcount))
continue;
tt_tmp = tt;
@@ -150,7 +159,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the corresponding tt_local_entry struct if the client is
+ * Return: a pointer to the corresponding tt_local_entry struct if the client is
* found, NULL otherwise.
*/
static struct batadv_tt_local_entry *
@@ -175,7 +184,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
*
- * Returns a pointer to the corresponding tt_global_entry struct if the client
+ * Return: a pointer to the corresponding tt_global_entry struct if the client
* is found, NULL otherwise.
*/
static struct batadv_tt_global_entry *
@@ -194,34 +203,68 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
return tt_global_entry;
}
+/**
+ * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
+ * for free after rcu grace period
+ * @ref: kref pointer of the nc_node
+ */
+static void batadv_tt_local_entry_release(struct kref *ref)
+{
+ struct batadv_tt_local_entry *tt_local_entry;
+
+ tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
+ common.refcount);
+
+ kfree_rcu(tt_local_entry, common.rcu);
+}
+
+/**
+ * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and
+ * possibly release it
+ * @tt_local_entry: tt_local_entry to be free'd
+ */
static void
-batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry)
+batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
+{
+ kref_put(&tt_local_entry->common.refcount,
+ batadv_tt_local_entry_release);
+}
+
+/**
+ * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
+ * for free after rcu grace period
+ * @ref: kref pointer of the nc_node
+ */
+static void batadv_tt_global_entry_release(struct kref *ref)
{
- if (atomic_dec_and_test(&tt_local_entry->common.refcount))
- kfree_rcu(tt_local_entry, common.rcu);
+ struct batadv_tt_global_entry *tt_global_entry;
+
+ tt_global_entry = container_of(ref, struct batadv_tt_global_entry,
+ common.refcount);
+
+ batadv_tt_global_del_orig_list(tt_global_entry);
+ kfree_rcu(tt_global_entry, common.rcu);
}
/**
- * batadv_tt_global_entry_free_ref - decrement the refcounter for a
- * tt_global_entry and possibly free it
- * @tt_global_entry: the object to free
+ * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and
+ * possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
*/
static void
-batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
+batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
{
- if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
- batadv_tt_global_del_orig_list(tt_global_entry);
- kfree_rcu(tt_global_entry, common.rcu);
- }
+ kref_put(&tt_global_entry->common.refcount,
+ batadv_tt_global_entry_release);
}
/**
* batadv_tt_global_hash_count - count the number of orig entries
- * @hash: hash table containing the tt entries
+ * @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
* @vid: VLAN identifier
*
- * Return the number of originators advertising the given address/data
+ * Return: the number of originators advertising the given address/data
* (excluding ourself).
*/
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
@@ -235,25 +278,11 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
return 0;
count = atomic_read(&tt_global_entry->orig_list_count);
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
return count;
}
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- /* We are in an rcu callback here, therefore we cannot use
- * batadv_orig_node_free_ref() and its call_rcu():
- * An rcu_barrier() wouldn't wait for that to finish
- */
- batadv_orig_node_free_ref_now(orig_entry->orig_node);
- kfree(orig_entry);
-}
-
/**
* batadv_tt_local_size_mod - change the size by v of the local table identified
* by vid
@@ -272,7 +301,7 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
atomic_add(v, &vlan->tt.num_entries);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
}
/**
@@ -300,9 +329,9 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_global_size_mod - change the size by v of the local table
- * identified by vid
- * @bat_priv: the bat priv with all the soft interface information
+ * batadv_tt_global_size_mod - change the size by v of the global table
+ * for orig_node identified by vid
+ * @orig_node: the originator for which the table has to be modified
* @vid: the VLAN identifier
* @v: the amount to sum to the global table size
*/
@@ -317,12 +346,14 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
spin_lock_bh(&orig_node->vlan_list_lock);
- hlist_del_init_rcu(&vlan->list);
+ if (!hlist_unhashed(&vlan->list)) {
+ hlist_del_init_rcu(&vlan->list);
+ batadv_orig_node_vlan_put(vlan);
+ }
spin_unlock_bh(&orig_node->vlan_list_lock);
- batadv_orig_node_vlan_free_ref(vlan);
}
- batadv_orig_node_vlan_free_ref(vlan);
+ batadv_orig_node_vlan_put(vlan);
}
/**
@@ -349,13 +380,31 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
batadv_tt_global_size_mod(orig_node, vid, -1);
}
-static void
-batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
+/**
+ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
+ * queue for free after rcu grace period
+ * @ref: kref pointer of the tt orig entry
+ */
+static void batadv_tt_orig_list_entry_release(struct kref *ref)
{
- if (!atomic_dec_and_test(&orig_entry->refcount))
- return;
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ orig_entry = container_of(ref, struct batadv_tt_orig_list_entry,
+ refcount);
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ batadv_orig_node_put(orig_entry->orig_node);
+ kfree_rcu(orig_entry, rcu);
+}
+
+/**
+ * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and
+ * possibly release it
+ * @orig_entry: tt orig entry to be free'd
+ */
+static void
+batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
+{
+ kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
}
/**
@@ -437,7 +486,7 @@ unlock:
* batadv_tt_len - compute length in bytes of given number of tt changes
* @changes_num: number of tt changes
*
- * Returns computed length in bytes.
+ * Return: computed length in bytes.
*/
static int batadv_tt_len(int changes_num)
{
@@ -448,7 +497,7 @@ static int batadv_tt_len(int changes_num)
* batadv_tt_entries - compute the number of entries fitting in tt_len bytes
* @tt_len: available space
*
- * Returns the number of entries.
+ * Return: the number of entries.
*/
static u16 batadv_tt_entries(u16 tt_len)
{
@@ -460,7 +509,7 @@ static u16 batadv_tt_entries(u16 tt_len)
* size when transmitted over the air
* @bat_priv: the bat priv with all the soft interface information
*
- * Returns local translation table size in bytes.
+ * Return: local translation table size in bytes.
*/
static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
{
@@ -512,7 +561,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
batadv_choose_tt, &tt_global->common);
- batadv_tt_global_entry_free_ref(tt_global);
+ batadv_tt_global_entry_put(tt_global);
}
/**
@@ -526,7 +575,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
* @mark: the value contained in the skb->mark field of the received packet (if
* any)
*
- * Returns true if the client was successfully added, false otherwise.
+ * Return: true if the client was successfully added, false otherwise.
*/
bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
unsigned short vid, int ifindex, u32 mark)
@@ -620,7 +669,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
tt_local->common.vid = vid;
if (batadv_is_wifi_netdev(in_dev))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
- atomic_set(&tt_local->common.refcount, 2);
+ kref_init(&tt_local->common.refcount);
+ kref_get(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
@@ -637,8 +687,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
- batadv_tt_local_entry_free_ref(tt_local);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_tt_local_entry_put(tt_local);
+ batadv_softif_vlan_put(vlan);
goto out;
}
@@ -704,9 +754,9 @@ out:
if (in_dev)
dev_put(in_dev);
if (tt_local)
- batadv_tt_local_entry_free_ref(tt_local);
+ batadv_tt_local_entry_put(tt_local);
if (tt_global)
- batadv_tt_global_entry_free_ref(tt_global);
+ batadv_tt_global_entry_put(tt_global);
return ret;
}
@@ -721,12 +771,11 @@ out:
* function reserves the amount of space needed to send the entire global TT
* table. In case of success the value is updated with the real amount of
* reserved bytes
-
* Allocate the needed amount of memory for the entire TT TVLV and write its
* header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN served by the originator node.
*
- * Return the size of the allocated buffer or 0 in case of failure.
+ * Return: the size of the allocated buffer or 0 in case of failure.
*/
static u16
batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
@@ -800,7 +849,7 @@ out:
* header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN.
*
- * Return the size of the allocated buffer or 0 in case of failure.
+ * Return: the size of the allocated buffer or 0 in case of failure.
*/
static u16
batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
@@ -1005,13 +1054,13 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
no_purge ? 0 : last_seen_msecs,
vlan->tt.crc);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
}
rcu_read_unlock();
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
@@ -1042,7 +1091,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
* @message: message to append to the log on deletion
* @roaming: true if the deletion is due to a roaming event
*
- * Returns the flags assigned to the local entry before being deleted
+ * Return: the flags assigned to the local entry before being deleted
*/
u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid, const char *message,
@@ -1088,19 +1137,19 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
goto out;
/* extra call to free the local tt entry */
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
/* decrease the reference held for this vlan */
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (!vlan)
goto out;
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
+ batadv_softif_vlan_put(vlan);
out:
if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return curr_flags;
}
@@ -1196,11 +1245,11 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
vlan = batadv_softif_vlan_get(bat_priv,
tt_common_entry->vid);
if (vlan) {
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
+ batadv_softif_vlan_put(vlan);
}
- batadv_tt_local_entry_free_ref(tt_local);
+ batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
}
@@ -1242,10 +1291,16 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
spin_unlock_bh(&bat_priv->tt.changes_list_lock);
}
-/* retrieves the orig_tt_list_entry belonging to orig_node from the
+/**
+ * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
+ * @entry: the TT global entry where the orig_list_entry has to be
+ * extracted from
+ * @orig_node: the originator for which the orig_list_entry has to be found
+ *
+ * retrieve the orig_tt_list_entry belonging to orig_node from the
* batadv_tt_global_entry list
*
- * returns it with an increased refcounter, NULL if not found
+ * Return: it with an increased refcounter, NULL if not found
*/
static struct batadv_tt_orig_list_entry *
batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
@@ -1259,7 +1314,7 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
if (tmp_orig_entry->orig_node != orig_node)
continue;
- if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
+ if (!kref_get_unless_zero(&tmp_orig_entry->refcount))
continue;
orig_entry = tmp_orig_entry;
@@ -1270,8 +1325,15 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
return orig_entry;
}
-/* find out if an orig_node is already in the list of a tt_global_entry.
- * returns true if found, false otherwise
+/**
+ * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
+ * by a given originator
+ * @entry: the TT global entry to check
+ * @orig_node: the originator to search in the list
+ *
+ * find out if an orig_node is already in the list of a tt_global_entry.
+ *
+ * Return: true if found, false otherwise
*/
static bool
batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
@@ -1283,7 +1345,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
if (orig_entry) {
found = true;
- batadv_tt_orig_list_entry_free_ref(orig_entry);
+ batadv_tt_orig_list_entry_put(orig_entry);
}
return found;
@@ -1309,11 +1371,12 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
goto out;
INIT_HLIST_NODE(&orig_entry->list);
- atomic_inc(&orig_node->refcount);
+ kref_get(&orig_node->refcount);
batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
- atomic_set(&orig_entry->refcount, 2);
+ kref_init(&orig_entry->refcount);
+ kref_get(&orig_entry->refcount);
spin_lock_bh(&tt_global->list_lock);
hlist_add_head_rcu(&orig_entry->list,
@@ -1323,7 +1386,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
out:
if (orig_entry)
- batadv_tt_orig_list_entry_free_ref(orig_entry);
+ batadv_tt_orig_list_entry_put(orig_entry);
}
/**
@@ -1343,7 +1406,7 @@ out:
*
* The caller must hold orig_node refcount.
*
- * Return true if the new entry has been added, false otherwise
+ * Return: true if the new entry has been added, false otherwise
*/
static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@ -1389,7 +1452,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
*/
if (flags & BATADV_TT_CLIENT_ROAM)
tt_global_entry->roam_at = jiffies;
- atomic_set(&common->refcount, 2);
+ kref_init(&common->refcount);
+ kref_get(&common->refcount);
common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
@@ -1403,7 +1467,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
goto out_remove;
}
} else {
@@ -1489,9 +1553,9 @@ out_remove:
out:
if (tt_global_entry)
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -1501,7 +1565,7 @@ out:
* @tt_global_entry: global translation table entry to be analyzed
*
* This functon assumes the caller holds rcu_read_lock().
- * Returns best originator list entry or NULL on errors.
+ * Return: best originator list entry or NULL on errors.
*/
static struct batadv_tt_orig_list_entry *
batadv_transtable_best_orig(struct batadv_priv *bat_priv,
@@ -1522,20 +1586,20 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
if (best_router &&
bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
best_router, BATADV_IF_DEFAULT) <= 0) {
- batadv_neigh_node_free_ref(router);
+ batadv_neigh_node_put(router);
continue;
}
/* release the refcount for the "old" best */
if (best_router)
- batadv_neigh_node_free_ref(best_router);
+ batadv_neigh_node_put(best_router);
best_entry = orig_entry;
best_router = router;
}
if (best_router)
- batadv_neigh_node_free_ref(best_router);
+ batadv_neigh_node_put(best_router);
return best_entry;
}
@@ -1588,7 +1652,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
- batadv_orig_node_vlan_free_ref(vlan);
+ batadv_orig_node_vlan_put(vlan);
}
print_list:
@@ -1620,7 +1684,7 @@ print_list:
((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
- batadv_orig_node_vlan_free_ref(vlan);
+ batadv_orig_node_vlan_put(vlan);
}
}
@@ -1661,7 +1725,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
}
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
return 0;
}
@@ -1689,7 +1753,7 @@ _batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
* being part of a list
*/
hlist_del_rcu(&orig_entry->list);
- batadv_tt_orig_list_entry_free_ref(orig_entry);
+ batadv_tt_orig_list_entry_put(orig_entry);
}
/* deletes the orig list of a tt_global_entry */
@@ -1845,9 +1909,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
out:
if (tt_global_entry)
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
if (local_entry)
- batadv_tt_local_entry_free_ref(local_entry);
+ batadv_tt_local_entry_put(local_entry);
}
/**
@@ -1901,7 +1965,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
tt_global->common.addr,
BATADV_PRINT_VID(vid), message);
hlist_del_rcu(&tt_common_entry->hash_entry);
- batadv_tt_global_entry_free_ref(tt_global);
+ batadv_tt_global_entry_put(tt_global);
}
}
spin_unlock_bh(list_lock);
@@ -1964,7 +2028,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
hlist_del_rcu(&tt_common->hash_entry);
- batadv_tt_global_entry_free_ref(tt_global);
+ batadv_tt_global_entry_put(tt_global);
}
spin_unlock_bh(list_lock);
}
@@ -1996,7 +2060,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
tt_global = container_of(tt_common_entry,
struct batadv_tt_global_entry,
common);
- batadv_tt_global_entry_free_ref(tt_global);
+ batadv_tt_global_entry_put(tt_global);
}
spin_unlock_bh(list_lock);
}
@@ -2031,7 +2095,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
* @addr: mac address of the destination client
* @vid: VLAN identifier
*
- * Returns a pointer to the originator that was selected as destination in the
+ * Return: a pointer to the originator that was selected as destination in the
* mesh for contacting the client 'addr', NULL otherwise.
* In case of multiple originators serving the same client, the function returns
* the best one (best in terms of metric towards the destination node).
@@ -2071,15 +2135,15 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
/* found anything? */
if (best_entry)
orig_node = best_entry->orig_node;
- if (orig_node && !atomic_inc_not_zero(&orig_node->refcount))
+ if (orig_node && !kref_get_unless_zero(&orig_node->refcount))
orig_node = NULL;
rcu_read_unlock();
out:
if (tt_global_entry)
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return orig_node;
}
@@ -2106,7 +2170,7 @@ out:
* because the XOR operation can combine them all while trying to reduce the
* noise as much as possible.
*
- * Returns the checksum of the global table of a given originator.
+ * Return: the checksum of the global table of a given originator.
*/
static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@ -2183,7 +2247,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
* For details about the computation, please refer to the documentation for
* batadv_tt_global_crc().
*
- * Returns the checksum of the local table
+ * Return: the checksum of the local table
*/
static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
unsigned short vid)
@@ -2289,7 +2353,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node this request is being issued for
*
- * Returns the pointer to the new tt_req_node struct if no request
+ * Return: the pointer to the new tt_req_node struct if no request
* has already been issued for this orig_node, NULL otherwise.
*/
static struct batadv_tt_req_node *
@@ -2324,7 +2388,7 @@ unlock:
* @entry_ptr: to be checked local tt entry
* @data_ptr: not used but definition required to satisfy the callback prototype
*
- * Returns 1 if the entry is a valid, 0 otherwise.
+ * Return: 1 if the entry is a valid, 0 otherwise.
*/
static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
{
@@ -2408,9 +2472,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
* @orig_node: originator for which the CRCs have to be checked
* @tt_vlan: pointer to the first tvlv VLAN entry
* @num_vlan: number of tvlv VLAN entries
- * @create: if true, create VLAN objects if not found
*
- * Return true if all the received CRCs match the locally stored ones, false
+ * Return: true if all the received CRCs match the locally stored ones, false
* otherwise
*/
static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
@@ -2440,7 +2503,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
return false;
crc = vlan->tt.crc;
- batadv_orig_node_vlan_free_ref(vlan);
+ batadv_orig_node_vlan_put(vlan);
if (crc != ntohl(tt_vlan_tmp->crc))
return false;
@@ -2513,6 +2576,8 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
* @num_vlan: number of tvlv VLAN entries
* @full_table: ask for the entire translation table if true, while only for the
* last TT diff otherwise
+ *
+ * Return: true if the TT Request was sent, false otherwise
*/
static int batadv_send_tt_request(struct batadv_priv *bat_priv,
struct batadv_orig_node *dst_orig_node,
@@ -2573,7 +2638,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
/* hlist_del_init() verifies tt_req_node still is in the list */
@@ -2593,7 +2658,7 @@ out:
* @req_src: mac address of tt request sender
* @req_dst: mac address of tt request recipient
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@ -2711,9 +2776,9 @@ unlock:
out:
if (res_dst_orig_node)
- batadv_orig_node_free_ref(res_dst_orig_node);
+ batadv_orig_node_put(res_dst_orig_node);
if (req_dst_orig_node)
- batadv_orig_node_free_ref(req_dst_orig_node);
+ batadv_orig_node_put(req_dst_orig_node);
kfree(tvlv_tt_data);
return ret;
}
@@ -2725,7 +2790,7 @@ out:
* @tt_data: tt data containing the tt request information
* @req_src: mac address of tt request sender
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@ -2828,9 +2893,9 @@ unlock:
out:
spin_unlock_bh(&bat_priv->tt.commit_lock);
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
kfree(tvlv_tt_data);
/* The packet was for this host, so it doesn't need to be re-routed */
return true;
@@ -2843,7 +2908,7 @@ out:
* @req_src: mac address of tt request sender
* @req_dst: mac address of tt request recipient
*
- * Returns true if tt request reply was sent, false otherwise.
+ * Return: true if tt request reply was sent, false otherwise.
*/
static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
@@ -2916,7 +2981,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -2938,7 +3003,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
* @addr: the mac address of the client to check
* @vid: VLAN identifier
*
- * Returns true if the client is served by this node, false otherwise.
+ * Return: true if the client is served by this node, false otherwise.
*/
bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
@@ -2958,7 +3023,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
ret = true;
out:
if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -3022,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
spin_unlock_bh(&bat_priv->tt.req_list_lock);
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3055,11 +3120,16 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
spin_unlock_bh(&bat_priv->tt.roam_list_lock);
}
-/* This function checks whether the client already reached the
+/**
+ * batadv_tt_check_roam_count - check if a client has roamed too frequently
+ * @bat_priv: the bat priv with all the soft interface information
+ * @client: mac address of the roaming client
+ *
+ * This function checks whether the client already reached the
* maximum number of possible roaming phases. In this case the ROAMING_ADV
* will not be sent.
*
- * returns true if the ROAMING_ADV can be sent, false otherwise
+ * Return: true if the ROAMING_ADV can be sent, false otherwise
*/
static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
{
@@ -3148,7 +3218,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
out:
if (primary_if)
- batadv_hardif_free_ref(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_tt_purge(struct work_struct *work)
@@ -3272,11 +3342,11 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
/* decrease the reference held for this vlan */
vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
if (vlan) {
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
+ batadv_softif_vlan_put(vlan);
}
- batadv_tt_local_entry_free_ref(tt_local);
+ batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
}
@@ -3359,11 +3429,11 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
ret = true;
out:
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_put(vlan);
if (tt_global_entry)
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -3371,13 +3441,12 @@ out:
* batadv_tt_update_orig - update global translation table with new tt
* information received via ogms
* @bat_priv: the bat priv with all the soft interface information
- * @orig: the orig_node of the ogm
- * @tt_vlan: pointer to the first tvlv VLAN entry
+ * @orig_node: the orig_node of the ogm
+ * @tt_buff: pointer to the first tvlv VLAN entry
* @tt_num_vlan: number of tvlv VLAN entries
* @tt_change: pointer to the first entry in the TT buffer
* @tt_num_changes: number of tt changes inside the tt buffer
* @ttvn: translation table version number of this changeset
- * @tt_crc: crc32 checksum of orig node's translation table
*/
static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@ -3459,7 +3528,7 @@ request_table:
* @addr: the mac address of the client to check
* @vid: VLAN identifier
*
- * Returns true if we know that the client has moved from its old originator
+ * Return: true if we know that the client has moved from its old originator
* to another one. This entry is still kept for consistency purposes and will be
* deleted later by a DEL or because of timeout
*/
@@ -3474,7 +3543,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
goto out;
ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
out:
return ret;
}
@@ -3485,7 +3554,7 @@ out:
* @addr: the mac address of the local client to query
* @vid: VLAN identifier
*
- * Returns true if the local client is known to be roaming (it is not served by
+ * Return: true if the local client is known to be roaming (it is not served by
* this node anymore) or not. If yes, the client is still present in the table
* to keep the latter consistent with the node TTVN
*/
@@ -3500,7 +3569,7 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
goto out;
ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM;
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
out:
return ret;
}
@@ -3614,7 +3683,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
* @tvlv_value: tvlv buffer containing the tt data
* @tvlv_value_len: tvlv buffer length
*
- * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
+ * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
* otherwise.
*/
static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3695,7 +3764,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
* @tvlv_value: tvlv buffer containing the tt data
* @tvlv_value_len: tvlv buffer length
*
- * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
+ * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
* otherwise.
*/
static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3733,7 +3802,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
out:
if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
@@ -3741,7 +3810,7 @@ out:
* batadv_tt_init - initialise the translation table internals
* @bat_priv: the bat priv with all the soft interface information
*
- * Return 0 on success or negative error number in case of failure.
+ * Return: 0 on success or negative error number in case of failure.
*/
int batadv_tt_init(struct batadv_priv *bat_priv)
{
@@ -3779,7 +3848,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
* @addr: the mac address of the client
* @vid: the identifier of the VLAN where this client is connected
*
- * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false
+ * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false
* otherwise
*/
bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
@@ -3794,7 +3863,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA;
- batadv_tt_global_entry_free_ref(tt);
+ batadv_tt_global_entry_put(tt);
return ret;
}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index abd8e116e5fb..7c7e2c006bfe 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 3437b667a2cd..612de23178e6 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -25,6 +25,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/if_ether.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/sched.h> /* for linux/wait.h */
#include <linux/spinlock.h>
@@ -73,7 +74,7 @@ enum batadv_dhcp_recipient {
#define BATADV_TT_SYNC_MASK 0x00F0
/**
- * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data
+ * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data
* @ogm_buff: buffer holding the OGM packet
* @ogm_buff_len: length of the OGM packet buffer
* @ogm_seqno: OGM sequence number - used to identify each OGM
@@ -97,8 +98,8 @@ struct batadv_hard_iface_bat_iv {
* batman-adv for this interface
* @soft_iface: the batman-adv interface which uses this network interface
* @rcu: struct used for freeing in an RCU-safe manner
- * @bat_iv: BATMAN IV specific per hard interface data
- * @cleanup_work: work queue callback item for hard interface deinit
+ * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
+ * @cleanup_work: work queue callback item for hard-interface deinit
* @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
* @neigh_list: list of unique single hop neighbors via this interface
* @neigh_list_lock: lock protecting neigh_list
@@ -110,7 +111,7 @@ struct batadv_hard_iface {
struct net_device *net_dev;
u8 num_bcasts;
struct kobject *hardif_obj;
- atomic_t refcount;
+ struct kref refcount;
struct packet_type batman_adv_ptype;
struct net_device *soft_iface;
struct rcu_head rcu;
@@ -125,7 +126,7 @@ struct batadv_hard_iface {
/**
* struct batadv_orig_ifinfo - originator info per outgoing interface
* @list: list node for orig_node::ifinfo_list
- * @if_outgoing: pointer to outgoing hard interface
+ * @if_outgoing: pointer to outgoing hard-interface
* @router: router that should be used to reach this originator
* @last_real_seqno: last and best known sequence number
* @last_ttl: ttl of last received packet
@@ -140,7 +141,7 @@ struct batadv_orig_ifinfo {
u32 last_real_seqno;
u8 last_ttl;
unsigned long batman_seqno_reset;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -196,13 +197,13 @@ struct batadv_orig_node_vlan {
unsigned short vid;
struct batadv_vlan_tt tt;
struct hlist_node list;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
/**
* struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
- * @bcast_own: set of bitfields (one per hard interface) where each one counts
+ * @bcast_own: set of bitfields (one per hard-interface) where each one counts
* the number of our OGMs this orig_node rebroadcasted "back" to us (relative
* to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
* @bcast_own_sum: sum of bcast_own
@@ -298,7 +299,7 @@ struct batadv_orig_node {
struct batadv_priv *bat_priv;
/* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
spinlock_t bcast_seqno_lock;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
#ifdef CONFIG_BATMAN_ADV_NC
struct list_head in_coding_list;
@@ -341,15 +342,16 @@ struct batadv_gw_node {
struct batadv_orig_node *orig_node;
u32 bandwidth_down;
u32 bandwidth_up;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
/**
- * batadv_hardif_neigh_node - unique neighbor per hard interface
+ * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
* @list: list node for batadv_hard_iface::neigh_list
* @addr: the MAC address of the neighboring interface
- * @if_incoming: pointer to incoming hard interface
+ * @if_incoming: pointer to incoming hard-interface
+ * @last_seen: when last packet via this neighbor was received
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in a RCU-safe manner
*/
@@ -358,7 +360,7 @@ struct batadv_hardif_neigh_node {
u8 addr[ETH_ALEN];
struct batadv_hard_iface *if_incoming;
unsigned long last_seen;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -369,7 +371,7 @@ struct batadv_hardif_neigh_node {
* @addr: the MAC address of the neighboring interface
* @ifinfo_list: list for routing metrics per outgoing interface
* @ifinfo_lock: lock protecting private ifinfo members and list
- * @if_incoming: pointer to incoming hard interface
+ * @if_incoming: pointer to incoming hard-interface
* @last_seen: when last packet via this neighbor was received
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
@@ -382,13 +384,13 @@ struct batadv_neigh_node {
spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
struct batadv_hard_iface *if_incoming;
unsigned long last_seen;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
/**
* struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
- * interface for BATMAN IV
+ * interface for B.A.T.M.A.N. IV
* @tq_recv: ring buffer of received TQ values from this neigh node
* @tq_index: ring buffer index
* @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
@@ -407,7 +409,7 @@ struct batadv_neigh_ifinfo_bat_iv {
/**
* struct batadv_neigh_ifinfo - neighbor information per outgoing interface
* @list: list node for batadv_neigh_node::ifinfo_list
- * @if_outgoing: pointer to outgoing hard interface
+ * @if_outgoing: pointer to outgoing hard-interface
* @bat_iv: B.A.T.M.A.N. IV private structure
* @last_ttl: last received ttl from this neigh node
* @refcount: number of contexts the object is used
@@ -418,7 +420,7 @@ struct batadv_neigh_ifinfo {
struct batadv_hard_iface *if_outgoing;
struct batadv_neigh_ifinfo_bat_iv bat_iv;
u8 last_ttl;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -744,7 +746,7 @@ struct batadv_softif_vlan {
atomic_t ap_isolation; /* boolean */
struct batadv_vlan_tt tt;
struct hlist_node list;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -771,6 +773,9 @@ struct batadv_softif_vlan {
* @orig_interval: OGM broadcast interval in milliseconds
* @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
* @log_level: configured log level (see batadv_dbg_level)
+ * @isolation_mark: the skb->mark value used to match packets for AP isolation
+ * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used
+ * for the isolation mark
* @bcast_seqno: last sent broadcast packet sequence number
* @bcast_queue_left: number of remaining buffered broadcast packet slots
* @batman_queue_left: number of remaining OGM packet slots
@@ -783,8 +788,8 @@ struct batadv_softif_vlan {
* @forw_bat_list_lock: lock protecting forw_bat_list
* @forw_bcast_list_lock: lock protecting forw_bcast_list
* @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft interface deinit
- * @primary_if: one of the hard interfaces assigned to this mesh interface
+ * @cleanup_work: work queue callback item for soft-interface deinit
+ * @primary_if: one of the hard-interfaces assigned to this mesh interface
* becomes the primary interface
* @bat_algo_ops: routing algorithm used by this mesh interface
* @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
@@ -925,7 +930,7 @@ struct batadv_bla_backbone_gw {
atomic_t request_sent;
u16 crc;
spinlock_t crc_lock; /* protects crc */
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -946,7 +951,7 @@ struct batadv_bla_claim {
unsigned long lasttime;
struct hlist_node hash_entry;
struct rcu_head rcu;
- atomic_t refcount;
+ struct kref refcount;
};
#endif
@@ -967,7 +972,7 @@ struct batadv_tt_common_entry {
struct hlist_node hash_entry;
u16 flags;
unsigned long added_at;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -1009,7 +1014,7 @@ struct batadv_tt_orig_list_entry {
struct batadv_orig_node *orig_node;
u8 ttvn;
struct hlist_node list;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -1062,7 +1067,7 @@ struct batadv_tt_roam_node {
struct batadv_nc_node {
struct list_head list;
u8 addr[ETH_ALEN];
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
struct batadv_orig_node *orig_node;
unsigned long last_seen;
@@ -1082,7 +1087,7 @@ struct batadv_nc_node {
struct batadv_nc_path {
struct hlist_node hash_entry;
struct rcu_head rcu;
- atomic_t refcount;
+ struct kref refcount;
struct list_head packet_list;
spinlock_t packet_list_lock; /* Protects packet_list */
u8 next_hop[ETH_ALEN];
@@ -1225,7 +1230,7 @@ struct batadv_dat_entry {
unsigned short vid;
unsigned long last_update;
struct hlist_node hash_entry;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
@@ -1261,7 +1266,7 @@ struct batadv_dat_candidate {
struct batadv_tvlv_container {
struct hlist_node list;
struct batadv_tvlv_hdr tvlv_hdr;
- atomic_t refcount;
+ struct kref refcount;
};
/**
@@ -1288,7 +1293,7 @@ struct batadv_tvlv_handler {
u8 type;
u8 version;
u8 flags;
- atomic_t refcount;
+ struct kref refcount;
struct rcu_head rcu;
};
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d040365ba98e..8a4cc2f7f0db 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
/* check that it's our buffer */
if (lowpan_is_ipv6(*skb_network_header(skb))) {
+ /* Pull off the 1-byte of 6lowpan header. */
+ skb_pull(skb, 1);
+
/* Copy the packet so that the IPv6 header is
* properly aligned.
*/
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
+ local_skb->dev = dev;
skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
if (!local_skb)
goto drop;
+ local_skb->dev = dev;
+
ret = iphc_decompress(local_skb, dev, chan);
if (ret < 0) {
kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
- local_skb->dev = dev;
if (give_skb_to_upper(local_skb, dev)
!= NET_RX_SUCCESS) {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 47bcef754796..883c821a9e78 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4112,8 +4112,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
break;
}
- *req_complete = bt_cb(skb)->hci.req_complete;
- *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+ if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
+ *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+ else
+ *req_complete = bt_cb(skb)->hci.req_complete;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 41b5f3813f02..c78ee2dc9323 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -688,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
* command to remove it from the controller.
*/
list_for_each_entry(b, &hdev->le_white_list, list) {
- struct hci_cp_le_del_from_white_list cp;
+ /* If the device is neither in pend_le_conns nor
+ * pend_le_reports then remove it from the whitelist.
+ */
+ if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &b->bdaddr, b->bdaddr_type) &&
+ !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &b->bdaddr, b->bdaddr_type)) {
+ struct hci_cp_le_del_from_white_list cp;
+
+ cp.bdaddr_type = b->bdaddr_type;
+ bacpy(&cp.bdaddr, &b->bdaddr);
- if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
- &b->bdaddr, b->bdaddr_type) ||
- hci_pend_le_action_lookup(&hdev->pend_le_reports,
- &b->bdaddr, b->bdaddr_type)) {
- white_list_entries++;
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+ sizeof(cp), &cp);
continue;
}
- cp.bdaddr_type = b->bdaddr_type;
- bacpy(&cp.bdaddr, &b->bdaddr);
+ if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+ /* White list can not be used with RPAs */
+ return 0x00;
+ }
- hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
- sizeof(cp), &cp);
+ white_list_entries++;
}
/* Since all no longer valid white list entries have been
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 39a5149f3010..eb4f5f24cbe3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
chan->sport = psm;
err = 0;
} else {
- u16 p;
+ u16 p, start, end, incr;
+
+ if (chan->src_type == BDADDR_BREDR) {
+ start = L2CAP_PSM_DYN_START;
+ end = L2CAP_PSM_AUTO_END;
+ incr = 2;
+ } else {
+ start = L2CAP_PSM_LE_DYN_START;
+ end = L2CAP_PSM_LE_DYN_END;
+ incr = 1;
+ }
err = -EINVAL;
- for (p = 0x1001; p < 0x1100; p += 2)
+ for (p = start; p <= end; p += incr)
if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
chan->psm = cpu_to_le16(p);
chan->sport = cpu_to_le16(p);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bb551527044..e4cae72895a7 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
return -EINVAL;
/* Restrict usage of well-known PSMs */
- if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+ if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
static int l2cap_validate_le_psm(u16 psm)
{
/* Valid LE_PSM ranges are defined only until 0x00ff */
- if (psm > 0x00ff)
+ if (psm > L2CAP_PSM_LE_DYN_END)
return -EINVAL;
/* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
- if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+ if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
goto done;
}
+ bacpy(&chan->src, &la.l2_bdaddr);
+ chan->src_type = la.l2_bdaddr_type;
+
if (la.l2_cid)
err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
break;
}
- bacpy(&chan->src, &la.l2_bdaddr);
- chan->src_type = la.l2_bdaddr_type;
-
if (chan->psm && bdaddr_type_is_le(chan->src_type))
chan->mode = L2CAP_MODE_LE_FLOWCTL;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ffed8a1d4f27..4b175df35184 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
hcon->dst_type = smp->remote_irk->addr_type;
queue_work(hdev->workqueue, &conn->id_addr_update_work);
}
-
- /* When receiving an indentity resolving key for
- * a remote device that does not use a resolvable
- * private address, just remove the key so that
- * it is possible to use the controller white
- * list for scanning.
- *
- * Userspace will have been told to not store
- * this key at this point. So it is safe to
- * just remove it.
- */
- if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
- list_del_rcu(&smp->remote_irk->list);
- kfree_rcu(smp->remote_irk, rcu);
- smp->remote_irk = NULL;
- }
}
if (smp->csrk) {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a1abe4936fe1..3addc05b9a16 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
+/* called with RTNL */
static int br_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
- rtnl_lock();
p = br_port_get_rtnl(dev);
if (!p)
goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
}
out:
- rtnl_unlock();
return err;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5e88d3e17546..2c8095a5d824 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -28,6 +28,8 @@
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_br_ops);
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -87,6 +89,11 @@ out:
return NETDEV_TX_OK;
}
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -99,6 +106,7 @@ static int br_dev_init(struct net_device *dev)
err = br_vlan_init(br);
if (err)
free_percpu(br->stats);
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c367b3e1b5ac..a73df3315df9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -36,10 +36,10 @@
*/
static int port_cost(struct net_device *dev)
{
- struct ethtool_cmd ecmd;
+ struct ethtool_link_ksettings ecmd;
- if (!__ethtool_get_settings(dev, &ecmd)) {
- switch (ethtool_cmd_speed(&ecmd)) {
+ if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
+ switch (ecmd.base.speed) {
case SPEED_10000:
return 2;
case SPEED_1000:
@@ -223,6 +223,31 @@ static void destroy_nbp_rcu(struct rcu_head *head)
destroy_nbp(p);
}
+static unsigned get_max_headroom(struct net_bridge *br)
+{
+ unsigned max_headroom = 0;
+ struct net_bridge_port *p;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
+
+ if (dev_headroom > max_headroom)
+ max_headroom = dev_headroom;
+ }
+
+ return max_headroom;
+}
+
+static void update_headroom(struct net_bridge *br, int new_hr)
+{
+ struct net_bridge_port *p;
+
+ list_for_each_entry(p, &br->port_list, list)
+ netdev_set_rx_headroom(p->dev, new_hr);
+
+ br->dev->needed_headroom = new_hr;
+}
+
/* Delete port(interface) from bridge is done in two steps.
* via RCU. First step, marks device as down. That deletes
* all the timers and stops new packets from flowing through.
@@ -248,6 +273,9 @@ static void del_nbp(struct net_bridge_port *p)
br_ifinfo_notify(RTM_DELLINK, p);
list_del_rcu(&p->list);
+ if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
+ update_headroom(br, get_max_headroom(br));
+ netdev_reset_rx_headroom(dev);
nbp_vlan_flush(p);
br_fdb_delete_by_port(br, p, 0, 1);
@@ -438,6 +466,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
+ unsigned br_hr, dev_hr;
bool changed_addr;
/* Don't allow bridging non-ethernet like devices, or DSA-enabled
@@ -505,8 +534,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
netdev_update_features(br->dev);
- if (br->dev->needed_headroom < dev->needed_headroom)
- br->dev->needed_headroom = dev->needed_headroom;
+ br_hr = br->dev->needed_headroom;
+ dev_hr = netdev_get_fwd_headroom(dev);
+ if (br_hr < dev_hr)
+ update_headroom(br, dev_hr);
+ else
+ netdev_set_rx_headroom(dev, br_hr);
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 30e105f57f0d..253bc77eda3b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -20,7 +20,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p;
- struct nlattr *nest;
+ struct nlattr *nest, *port_nest;
if (!br->multicast_router || hlist_empty(&br->router_list))
return 0;
@@ -30,8 +30,20 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
return -EMSGSIZE;
hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
- if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
+ if (!p)
+ continue;
+ port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT);
+ if (!port_nest)
+ goto fail;
+ if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
+ nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
+ br_timer_value(&p->multicast_router_timer)) ||
+ nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
+ p->multicast_router)) {
+ nla_nest_cancel(skb, port_nest);
goto fail;
+ }
+ nla_nest_end(skb, port_nest);
}
nla_nest_end(skb, nest);
@@ -41,6 +53,14 @@ fail:
return -EMSGSIZE;
}
+static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
+{
+ e->state = flags & MDB_PG_FLAGS_PERMANENT;
+ e->flags = 0;
+ if (flags & MDB_PG_FLAGS_OFFLOAD)
+ e->flags |= MDB_FLAGS_OFFLOAD;
+}
+
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
@@ -80,26 +100,41 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
for (pp = &mp->ports;
(p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
+ struct nlattr *nest_ent;
+ struct br_mdb_entry e;
+
port = p->port;
- if (port) {
- struct br_mdb_entry e;
- memset(&e, 0, sizeof(e));
- e.ifindex = port->dev->ifindex;
- e.state = p->state;
- e.vid = p->addr.vid;
- if (p->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = p->addr.u.ip4;
+ if (!port)
+ continue;
+
+ memset(&e, 0, sizeof(e));
+ e.ifindex = port->dev->ifindex;
+ e.vid = p->addr.vid;
+ __mdb_entry_fill_flags(&e, p->flags);
+ if (p->addr.proto == htons(ETH_P_IP))
+ e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- if (p->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = p->addr.u.ip6;
+ if (p->addr.proto == htons(ETH_P_IPV6))
+ e.addr.u.ip6 = p->addr.u.ip6;
#endif
- e.addr.proto = p->addr.proto;
- if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
+ e.addr.proto = p->addr.proto;
+ nest_ent = nla_nest_start(skb,
+ MDBA_MDB_ENTRY_INFO);
+ if (!nest_ent) {
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
+ }
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb,
+ MDBA_MDB_EATTR_TIMER,
+ br_timer_value(&p->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
}
+ nla_nest_end(skb, nest_ent);
}
nla_nest_end(skb, nest2);
skip:
@@ -209,7 +244,7 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
}
static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
- int type)
+ int type, struct net_bridge_port_group *pg)
{
struct switchdev_obj_port_mdb mdb = {
.obj = {
@@ -232,10 +267,13 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
#endif
mdb.obj.orig_dev = port_dev;
- if (port_dev && type == RTM_NEWMDB)
- switchdev_port_obj_add(port_dev, &mdb.obj);
- else if (port_dev && type == RTM_DELMDB)
+ if (port_dev && type == RTM_NEWMDB) {
+ err = switchdev_port_obj_add(port_dev, &mdb.obj);
+ if (!err && pg)
+ pg->flags |= MDB_PG_FLAGS_OFFLOAD;
+ } else if (port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
+ }
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
@@ -253,21 +291,21 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 state)
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
+ int type)
{
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = port->dev->ifindex;
- entry.addr.proto = group->proto;
- entry.addr.u.ip4 = group->u.ip4;
+ entry.ifindex = pg->port->dev->ifindex;
+ entry.addr.proto = pg->addr.proto;
+ entry.addr.u.ip4 = pg->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- entry.addr.u.ip6 = group->u.ip6;
+ entry.addr.u.ip6 = pg->addr.u.ip6;
#endif
- entry.state = state;
- entry.vid = group->vid;
- __br_mdb_notify(dev, &entry, type);
+ entry.vid = pg->addr.vid;
+ __mdb_entry_fill_flags(&entry, pg->flags);
+ __br_mdb_notify(dev, &entry, type, pg);
}
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@ -412,7 +450,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, unsigned char state)
+ struct br_ip *group, unsigned char state,
+ struct net_bridge_port_group **pg)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@ -425,8 +464,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
mp = br_mdb_ip_get(mdb, group);
if (!mp) {
mp = br_multicast_new_group(br, port, group);
- err = PTR_ERR(mp);
- if (IS_ERR(mp))
+ err = PTR_ERR_OR_ZERO(mp);
+ if (err)
return err;
}
@@ -443,6 +482,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
+ *pg = p;
if (state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -450,7 +490,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct br_mdb_entry *entry)
+ struct br_mdb_entry *entry,
+ struct net_bridge_port_group **pg)
{
struct br_ip ip;
struct net_device *dev;
@@ -479,7 +520,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
#endif
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry->state);
+ ret = br_mdb_add_group(br, p, &ip, entry->state, pg);
spin_unlock_bh(&br->multicast_lock);
return ret;
}
@@ -487,6 +528,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
+ struct net_bridge_port_group *pg;
struct net_bridge_vlan_group *vg;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
@@ -516,15 +558,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
if (br_vlan_enabled(br) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, entry, &pg);
if (err)
break;
- __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
}
} else {
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, entry, &pg);
if (!err)
- __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
}
return err;
@@ -568,7 +610,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (p->port->state == BR_STATE_DISABLED)
goto unlock;
- entry->state = p->state;
+ __mdb_entry_fill_flags(entry, p->flags);
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
@@ -620,12 +662,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
entry->vid = v->vid;
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB);
+ __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
}
} else {
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB);
+ __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 03661d97463c..a4c15df2b792 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -283,8 +283,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
- p->state);
+ br_mdb_notify(br->dev, p, RTM_DELMDB);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
if (!mp->ports && !mp->mglist &&
@@ -304,7 +303,7 @@ static void br_multicast_port_group_expired(unsigned long data)
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
- hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT)
+ hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
goto out;
br_multicast_del_pg(br, pg);
@@ -649,7 +648,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
struct br_ip *group,
struct net_bridge_port_group __rcu *next,
- unsigned char state)
+ unsigned char flags)
{
struct net_bridge_port_group *p;
@@ -659,7 +658,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
p->addr = *group;
p->port = port;
- p->state = state;
+ p->flags = flags;
rcu_assign_pointer(p->next, next);
hlist_add_head(&p->mglist, &port->mglist);
setup_timer(&p->timer, br_multicast_port_group_expired,
@@ -702,11 +701,11 @@ static int br_multicast_add_group(struct net_bridge *br,
break;
}
- p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY);
+ p = br_multicast_new_port_group(port, group, *pp, 0);
if (unlikely(!p))
goto err;
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY);
+ br_mdb_notify(br->dev, p, RTM_NEWMDB);
found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -760,13 +759,17 @@ static void br_multicast_router_expired(unsigned long data)
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
- if (port->multicast_router != 1 ||
+ if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ port->multicast_router == MDB_RTR_TYPE_PERM ||
timer_pending(&port->multicast_router_timer) ||
hlist_unhashed(&port->rlist))
goto out;
hlist_del_init_rcu(&port->rlist);
br_rtr_notify(br->dev, port, RTM_DELMDB);
+ /* Don't allow timer refresh if the router expired */
+ if (port->multicast_router == MDB_RTR_TYPE_TEMP)
+ port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
out:
spin_unlock(&br->multicast_lock);
@@ -913,7 +916,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
void br_multicast_add_port(struct net_bridge_port *port)
{
- port->multicast_router = 1;
+ port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
(unsigned long)port);
@@ -960,7 +963,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
#if IS_ENABLED(CONFIG_IPV6)
br_multicast_enable(&port->ip6_own_query);
#endif
- if (port->multicast_router == 2 && hlist_unhashed(&port->rlist))
+ if (port->multicast_router == MDB_RTR_TYPE_PERM &&
+ hlist_unhashed(&port->rlist))
br_multicast_add_router(br, port);
out:
@@ -975,12 +979,15 @@ void br_multicast_disable_port(struct net_bridge_port *port)
spin_lock(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- if (pg->state == MDB_TEMPORARY)
+ if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
br_multicast_del_pg(br, pg);
if (!hlist_unhashed(&port->rlist)) {
hlist_del_init_rcu(&port->rlist);
br_rtr_notify(br->dev, port, RTM_DELMDB);
+ /* Don't allow timer refresh if disabling */
+ if (port->multicast_router == MDB_RTR_TYPE_TEMP)
+ port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
}
del_timer(&port->multicast_router_timer);
del_timer(&port->ip4_own_query.timer);
@@ -1228,13 +1235,14 @@ static void br_multicast_mark_router(struct net_bridge *br,
unsigned long now = jiffies;
if (!port) {
- if (br->multicast_router == 1)
+ if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
mod_timer(&br->multicast_router_timer,
now + br->multicast_querier_interval);
return;
}
- if (port->multicast_router != 1)
+ if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ port->multicast_router == MDB_RTR_TYPE_PERM)
return;
br_multicast_add_router(br, port);
@@ -1453,8 +1461,7 @@ br_multicast_leave_group(struct net_bridge *br,
hlist_del_init(&p->mglist);
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- br_mdb_notify(br->dev, port, group, RTM_DELMDB,
- p->state);
+ br_mdb_notify(br->dev, p, RTM_DELMDB);
if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
@@ -1715,7 +1722,7 @@ void br_multicast_init(struct net_bridge *br)
br->hash_elasticity = 4;
br->hash_max = 512;
- br->multicast_router = 1;
+ br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
br->multicast_querier = 0;
br->multicast_query_use_ifaddr = 0;
br->multicast_last_member_count = 2;
@@ -1825,11 +1832,11 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
spin_lock_bh(&br->multicast_lock);
switch (val) {
- case 0:
- case 2:
+ case MDB_RTR_TYPE_DISABLED:
+ case MDB_RTR_TYPE_PERM:
del_timer(&br->multicast_router_timer);
/* fall through */
- case 1:
+ case MDB_RTR_TYPE_TEMP_QUERY:
br->multicast_router = val;
err = 0;
break;
@@ -1840,37 +1847,53 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
return err;
}
+static void __del_port_router(struct net_bridge_port *p)
+{
+ if (hlist_unhashed(&p->rlist))
+ return;
+ hlist_del_init_rcu(&p->rlist);
+ br_rtr_notify(p->br->dev, p, RTM_DELMDB);
+}
+
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
{
struct net_bridge *br = p->br;
+ unsigned long now = jiffies;
int err = -EINVAL;
spin_lock(&br->multicast_lock);
-
- switch (val) {
- case 0:
- case 1:
- case 2:
- p->multicast_router = val;
+ if (p->multicast_router == val) {
+ /* Refresh the temp router port timer */
+ if (p->multicast_router == MDB_RTR_TYPE_TEMP)
+ mod_timer(&p->multicast_router_timer,
+ now + br->multicast_querier_interval);
err = 0;
-
- if (val < 2 && !hlist_unhashed(&p->rlist)) {
- hlist_del_init_rcu(&p->rlist);
- br_rtr_notify(br->dev, p, RTM_DELMDB);
- }
-
- if (val == 1)
- break;
-
+ goto unlock;
+ }
+ switch (val) {
+ case MDB_RTR_TYPE_DISABLED:
+ p->multicast_router = MDB_RTR_TYPE_DISABLED;
+ __del_port_router(p);
+ del_timer(&p->multicast_router_timer);
+ break;
+ case MDB_RTR_TYPE_TEMP_QUERY:
+ p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ __del_port_router(p);
+ break;
+ case MDB_RTR_TYPE_PERM:
+ p->multicast_router = MDB_RTR_TYPE_PERM;
del_timer(&p->multicast_router_timer);
-
- if (val == 0)
- break;
-
br_multicast_add_router(br, p);
break;
+ case MDB_RTR_TYPE_TEMP:
+ p->multicast_router = MDB_RTR_TYPE_TEMP;
+ br_multicast_mark_router(br, p);
+ break;
+ default:
+ goto unlock;
}
-
+ err = 0;
+unlock:
spin_unlock(&br->multicast_lock);
return err;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 40197ff8918a..e9c635eae24d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
return -ENETDOWN;
br_set_state(p, state);
- br_log_state(p);
br_port_state_selection(p->br);
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 216018c76018..1b5d145dfcbf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,9 @@ struct net_bridge_fdb_entry
struct rcu_head rcu;
};
+#define MDB_PG_FLAGS_PERMANENT BIT(0)
+#define MDB_PG_FLAGS_OFFLOAD BIT(1)
+
struct net_bridge_port_group {
struct net_bridge_port *port;
struct net_bridge_port_group __rcu *next;
@@ -157,7 +160,7 @@ struct net_bridge_port_group {
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
- unsigned char state;
+ unsigned char flags;
};
struct net_bridge_mdb_entry
@@ -554,11 +557,11 @@ void br_multicast_free_pg(struct rcu_head *head);
struct net_bridge_port_group *
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
struct net_bridge_port_group __rcu *next,
- unsigned char state);
+ unsigned char flags);
void br_mdb_init(void);
void br_mdb_uninit(void);
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 state);
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
+ int type);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
@@ -897,7 +900,6 @@ static inline void br_nf_core_fini(void) {}
#endif
/* br_stp.c */
-void br_log_state(const struct net_bridge_port *p);
void br_set_state(struct net_bridge_port *p, unsigned int state);
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
void br_init_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b3cca126b103..c22816a0b1b1 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = {
[BR_STATE_BLOCKING] = "blocking",
};
-void br_log_state(const struct net_bridge_port *p)
-{
- br_info(p->br, "port %u(%s) entered %s state\n",
- (unsigned int) p->port_no, p->dev->name,
- br_port_state_names[p->state]);
-}
-
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
struct switchdev_attr attr = {
@@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
+ else
+ br_info(p->br, "port %u(%s) entered %s state\n",
+ (unsigned int) p->port_no, p->dev->name,
+ br_port_state_names[p->state]);
}
/* called under bridge lock */
@@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br,
(unsigned int) p->port_no, p->dev->name);
br_set_state(p, BR_STATE_LISTENING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
if (br->forward_delay > 0)
@@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p)
br_topology_change_detection(p->br);
br_set_state(p, BR_STATE_BLOCKING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
del_timer(&p->forward_delay_timer);
@@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
else
br_set_state(p, BR_STATE_LEARNING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
if (br->forward_delay != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a31ac6ad76a2..984d46263007 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
br_port_state_selection(p->br);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
}
@@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
del_timer(&p->message_age_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 5f0f5af0ec35..da058b85aa22 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg)
br_topology_change_detection(br);
netif_carrier_on(br->dev);
}
- br_log_state(p);
rcu_read_lock();
br_ifinfo_notify(RTM_NEWLINK, p);
rcu_read_unlock();
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 85e43af4af7a..9309bb4f2a5b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -955,6 +955,13 @@ err_rhtbl:
*/
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
+ struct switchdev_obj_port_vlan v = {
+ .obj.orig_dev = port->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = flags,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
struct net_bridge_vlan *vlan;
int ret;
@@ -962,6 +969,10 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
+ /* Pass the flags to the hardware bridge */
+ ret = switchdev_port_obj_add(port->dev, &v.obj);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
__vlan_add_flags(vlan, flags);
return 0;
}
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index fdba3d9fbff3..adc8d7221dbb 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -48,6 +48,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
+ struct net *net = sock_net(oldskb->sk);
if (!nft_bridge_iphdr_validate(oldskb))
return;
@@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- sysctl_ip_default_ttl);
+ net->ipv4.sysctl_ip_default_ttl);
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
- niph->ttl = sysctl_ip_default_ttl;
+ niph->ttl = net->ipv4.sysctl_ip_default_ttl;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
@@ -85,6 +86,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
void *payload;
__wsum csum;
u8 proto;
+ struct net *net = sock_net(oldskb->sk);
if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
return;
@@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- sysctl_ip_default_ttl);
+ net->ipv4.sysctl_ip_default_ttl);
skb_reset_transport_header(nskb);
icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 61d7617d9249..b82440e1fcb4 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -159,7 +159,7 @@ static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
tmppkt = NULL;
/* Verify that length is correct */
- err = EPROTO;
+ err = -EPROTO;
if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
goto out;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 10d87753ed87..9e43a315e662 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
- struct ceph_timespec new_validity;
struct ceph_crypto_key new_session_key;
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
+ ceph_decode_timespec(&validity, dp);
+ dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
ceph_buffer_put(th->ticket_blob);
th->session_key = new_session_key;
th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
th->secret_id = new_secret_id;
th->expires = new_expires;
th->renew_after = new_renew_after;
+ th->have_key = true;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
@@ -384,6 +383,24 @@ bad:
return -ERANGE;
}
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+ if (!th->have_key)
+ return true;
+
+ return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+ if (th->have_key) {
+ if (get_seconds() >= th->expires)
+ th->have_key = false;
+ }
+
+ return th->have_key;
+}
+
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
{
int want = ac->want_keys;
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
continue;
th = get_ticket_handler(ac, service);
-
if (IS_ERR(th)) {
*pneed |= service;
continue;
}
- if (get_seconds() >= th->renew_after)
+ if (need_key(th))
*pneed |= service;
- if (get_seconds() >= th->expires)
+ if (!have_key(th))
xi->have_keys &= ~service;
}
}
-
static int ceph_x_build_request(struct ceph_auth_client *ac,
void *buf, void *end)
{
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
ac->private = NULL;
}
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
- int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
{
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
- memset(&th->validity, 0, sizeof(th->validity));
+ th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type)
+{
+ /*
+ * We are to invalidate a service ticket in the hopes of
+ * getting a new, hopefully more valid, one. But, we won't get
+ * it unless our AUTH ticket is good, so invalidate AUTH ticket
+ * as well, just in case.
+ */
+ invalidate_ticket(ac, peer_type);
+ invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
static int calcu_signature(struct ceph_x_authorizer *au,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index e8b7c6917d47..40b1a3cf7397 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
unsigned int service;
struct ceph_crypto_key session_key;
- struct ceph_timespec validity;
+ bool have_key;
u64 secret_id;
struct ceph_buffer *ticket_blob;
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 393bfb22d5bb..5fcfb98f309e 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
* @local_retries: localized retries
* @local_fallback_retries: localized fallback retries
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
unsigned int local_fallback_retries,
int recurse_to_leaf,
unsigned int vary_r,
+ unsigned int stable,
int *out2,
int parent_r)
{
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
int collide, reject;
int count = out_size;
- dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep,
tries, recurse_tries, local_retries, local_fallback_retries,
- parent_r);
+ parent_r, stable);
- for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+ for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
if (crush_choose_firstn(map,
map->buckets[-1-item],
weight, weight_max,
- x, outpos+1, 0,
+ x, stable ? 1 : outpos+1, 0,
out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
0,
vary_r,
+ stable,
NULL,
sub_r) <= outpos)
/* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
int choose_local_fallback_retries = map->choose_local_fallback_tries;
int vary_r = map->chooseleaf_vary_r;
+ int stable = map->chooseleaf_stable;
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
case CRUSH_RULE_TAKE:
if ((curstep->arg1 >= 0 &&
curstep->arg1 < map->max_devices) ||
- (-1-curstep->arg1 < map->max_buckets &&
+ (-1-curstep->arg1 >= 0 &&
+ -1-curstep->arg1 < map->max_buckets &&
map->buckets[-1-curstep->arg1])) {
w[0] = curstep->arg1;
wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
vary_r = curstep->arg1;
break;
+ case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+ if (curstep->arg1 >= 0)
+ stable = curstep->arg1;
+ break;
+
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
osize = 0;
for (i = 0; i < wsize; i++) {
+ int bno;
/*
* see CRUSH_N, CRUSH_N_MINUS macros.
* basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
continue;
}
j = 0;
+ /* make sure bucket id is valid */
+ bno = -1 - w[i];
+ if (bno < 0 || bno >= map->max_buckets) {
+ /* w[i] is probably CRUSH_ITEM_NONE */
+ dprintk(" bad w[i] %d\n", w[i]);
+ continue;
+ }
if (firstn) {
int recurse_tries;
if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_tries = choose_tries;
osize += crush_choose_firstn(
map,
- map->buckets[-1-w[i]],
+ map->buckets[bno],
weight, weight_max,
x, numrep,
curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
choose_local_fallback_retries,
recurse_to_leaf,
vary_r,
+ stable,
c+osize,
0);
} else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
numrep : (result_max-osize));
crush_choose_indep(
map,
- map->buckets[-1-w[i]],
+ map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
curstep->arg2,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9981039ef4ff..9cfedf565f5b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -23,9 +23,6 @@
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
-#define list_entry_next(pos, member) \
- list_entry(pos->member.next, typeof(*pos), member)
-
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
- cursor->page = list_entry_next(cursor->page, lru);
+ cursor->page = list_next_entry(cursor->page, lru);
cursor->last_piece = cursor->resid <= PAGE_SIZE;
return true;
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
- cursor->data = list_entry_next(cursor->data, links);
+ cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
@@ -2506,13 +2528,13 @@ more:
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con)
static void con_fault_finish(struct ceph_connection *con)
{
+ dout("%s %p\n", __func__, con);
+
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
+ if (con->auth_retry) {
+ dout("auth_retry %d, invalidating\n", con->auth_retry);
+ if (con->ops->invalidate_authorizer)
+ con->ops->invalidate_authorizer(con);
+ con->auth_retry = 0;
}
if (con->ops->fault)
@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m)
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
- LIST_HEAD(data);
- struct list_head *links;
- struct list_head *next;
+ struct ceph_msg_data *data, *next;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref)
m->middle = NULL;
}
- list_splice_init(&m->data, &data);
- list_for_each_safe(links, next, &data) {
- struct ceph_msg_data *data;
-
- data = list_entry(links, struct ceph_msg_data, links);
- list_del_init(links);
+ list_for_each_entry_safe(data, next, &m->data, links) {
+ list_del_init(&data->links);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index edda01626a45..de85dddc3dc0 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
return monc->client->have_fsid && monc->auth->global_id > 0;
}
-/*
- * The monitor responds with mount ack indicate mount success. The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f8f235930d88..3534e12683d3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
u32 osdmap_epoch;
int already_completed;
u32 bytes;
+ u8 decode_redir;
unsigned int i;
tid = le64_to_cpu(msg->hdr.tid);
@@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
p += 8 + 4; /* skip replay_version */
p += 8; /* skip user_version */
+ if (le16_to_cpu(msg->hdr.version) >= 7)
+ ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+ else
+ decode_redir = 1;
+ } else {
+ decode_redir = 0;
+ }
+
+ if (decode_redir) {
err = ceph_redirect_decode(&p, end, &redir);
if (err)
goto bad_put;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 7d8f581d9f1f..243574c8cf33 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
c->choose_local_tries = ceph_decode_32(p);
c->choose_local_fallback_tries = ceph_decode_32(p);
c->choose_total_tries = ceph_decode_32(p);
- dout("crush decode tunable choose_local_tries = %d",
+ dout("crush decode tunable choose_local_tries = %d\n",
c->choose_local_tries);
- dout("crush decode tunable choose_local_fallback_tries = %d",
+ dout("crush decode tunable choose_local_fallback_tries = %d\n",
c->choose_local_fallback_tries);
- dout("crush decode tunable choose_total_tries = %d",
+ dout("crush decode tunable choose_total_tries = %d\n",
c->choose_total_tries);
ceph_decode_need(p, end, sizeof(u32), done);
c->chooseleaf_descend_once = ceph_decode_32(p);
- dout("crush decode tunable chooseleaf_descend_once = %d",
+ dout("crush decode tunable chooseleaf_descend_once = %d\n",
c->chooseleaf_descend_once);
ceph_decode_need(p, end, sizeof(u8), done);
c->chooseleaf_vary_r = ceph_decode_8(p);
- dout("crush decode tunable chooseleaf_vary_r = %d",
+ dout("crush decode tunable chooseleaf_vary_r = %d\n",
c->chooseleaf_vary_r);
+ /* skip straw_calc_version, allowed_bucket_algs */
+ ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+ *p += sizeof(u8) + sizeof(u32);
+
+ ceph_decode_need(p, end, sizeof(u8), done);
+ c->chooseleaf_stable = ceph_decode_8(p);
+ dout("crush decode tunable chooseleaf_stable = %d\n",
+ c->chooseleaf_stable);
+
done:
dout("crush_decode success\n");
return c;
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..014422e2561f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,5 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_DST_CACHE) += dst_cache.o
+obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ca95d5d7af0..edb7179bc051 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2695,6 +2695,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
*
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
+ *
+ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
*/
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
@@ -2709,6 +2711,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
+ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
SKB_GSO_CB(skb)->encap_level = 0;
@@ -3824,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
trace_consume_skb(skb);
else
trace_kfree_skb(skb, net_tx_action);
- __kfree_skb(skb);
+
+ if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
+ else
+ __kfree_skb_defer(skb);
}
+
+ __kfree_skb_flush();
}
if (sd->output_queue) {
@@ -4149,7 +4160,10 @@ ncls:
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
drop:
- atomic_long_inc(&skb->dev->rx_dropped);
+ if (!deliver_exact)
+ atomic_long_inc(&skb->dev->rx_dropped);
+ else
+ atomic_long_inc(&skb->dev->rx_nohandler);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
@@ -4346,6 +4360,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_metadata_dst_cmp(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -4543,10 +4558,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+ skb_dst_drop(skb);
kmem_cache_free(skbuff_head_cache, skb);
- else
+ } else {
__kfree_skb(skb);
+ }
break;
case GRO_HELD:
@@ -5144,6 +5161,7 @@ static void net_rx_action(struct softirq_action *h)
}
}
+ __kfree_skb_flush();
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -5371,12 +5389,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
{
struct netdev_adjacent *lower;
- lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+ lower = list_entry(*iter, struct netdev_adjacent, list);
if (&lower->list == &dev->adj_list.lower)
return NULL;
- *iter = &lower->list;
+ *iter = lower->list.next;
return lower->dev;
}
@@ -7245,24 +7263,31 @@ void netdev_run_todo(void)
}
}
-/* Convert net_device_stats to rtnl_link_stats64. They have the same
- * fields in the same order, with only the type differing.
+/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
+ * all the same fields in the same order as net_device_stats, with only
+ * the type differing, but rtnl_link_stats64 may have additional fields
+ * at the end for newer counters.
*/
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats)
{
#if BITS_PER_LONG == 64
- BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+ BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
memcpy(stats64, netdev_stats, sizeof(*stats64));
+ /* zero out counters that only exist in rtnl_link_stats64 */
+ memset((char *)stats64 + sizeof(*netdev_stats), 0,
+ sizeof(*stats64) - sizeof(*netdev_stats));
#else
- size_t i, n = sizeof(*stats64) / sizeof(u64);
+ size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
const unsigned long *src = (const unsigned long *)netdev_stats;
u64 *dst = (u64 *)stats64;
- BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
- sizeof(*stats64) / sizeof(u64));
+ BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
dst[i] = src[i];
+ /* zero out counters that only exist in rtnl_link_stats64 */
+ memset((char *)stats64 + n * sizeof(u64), 0,
+ sizeof(*stats64) - n * sizeof(u64));
#endif
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -7292,6 +7317,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
}
storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -7414,8 +7440,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
- if (!dev->tx_queue_len)
+ if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->tx_queue_len = 1;
+ }
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
diff --git a/net/core/devlink.c b/net/core/devlink.c
new file mode 100644
index 000000000000..590fa561cb7f
--- /dev/null
+++ b/net/core/devlink.c
@@ -0,0 +1,738 @@
+/*
+ * net/core/devlink.c - Network physical/parent device Netlink interface
+ *
+ * Heavily inspired by net/wireless/
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <rdma/ib_verbs.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/devlink.h>
+
+static LIST_HEAD(devlink_list);
+
+/* devlink_mutex
+ *
+ * An overall lock guarding every operation coming from userspace.
+ * It also guards devlink devices list and it is taken when
+ * driver registers/unregisters it.
+ */
+static DEFINE_MUTEX(devlink_mutex);
+
+/* devlink_port_mutex
+ *
+ * Shared lock to guard lists of ports in all devlink devices.
+ */
+static DEFINE_MUTEX(devlink_port_mutex);
+
+static struct net *devlink_net(const struct devlink *devlink)
+{
+ return read_pnet(&devlink->_net);
+}
+
+static void devlink_net_set(struct devlink *devlink, struct net *net)
+{
+ write_pnet(&devlink->_net, net);
+}
+
+static struct devlink *devlink_get_from_attrs(struct net *net,
+ struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ char *busname;
+ char *devname;
+
+ if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
+ return ERR_PTR(-EINVAL);
+
+ busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
+ devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
+
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (strcmp(devlink->dev->bus->name, busname) == 0 &&
+ strcmp(dev_name(devlink->dev), devname) == 0 &&
+ net_eq(devlink_net(devlink), net))
+ return devlink;
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static struct devlink *devlink_get_from_info(struct genl_info *info)
+{
+ return devlink_get_from_attrs(genl_info_net(info), info->attrs);
+}
+
+static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+ int port_index)
+{
+ struct devlink_port *devlink_port;
+
+ list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ if (devlink_port->index == port_index)
+ return devlink_port;
+ }
+ return NULL;
+}
+
+static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+{
+ return devlink_port_get_by_index(devlink, port_index);
+}
+
+static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port)
+ return ERR_PTR(-ENODEV);
+ return devlink_port;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_port_get_from_attrs(devlink, info->attrs);
+}
+
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+
+static int devlink_nl_pre_doit(const struct genl_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink;
+
+ mutex_lock(&devlink_mutex);
+ devlink = devlink_get_from_info(info);
+ if (IS_ERR(devlink)) {
+ mutex_unlock(&devlink_mutex);
+ return PTR_ERR(devlink);
+ }
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ struct devlink_port *devlink_port;
+
+ mutex_lock(&devlink_port_mutex);
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (IS_ERR(devlink_port)) {
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+ return PTR_ERR(devlink_port);
+ }
+ info->user_ptr[1] = devlink_port;
+ }
+ return 0;
+}
+
+static void devlink_nl_post_doit(const struct genl_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+}
+
+static struct genl_family devlink_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .name = DEVLINK_GENL_NAME,
+ .version = DEVLINK_GENL_VERSION,
+ .maxattr = DEVLINK_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = devlink_nl_pre_doit,
+ .post_doit = devlink_nl_post_doit,
+};
+
+enum devlink_multicast_groups {
+ DEVLINK_MCGRP_CONFIG,
+};
+
+static const struct genl_multicast_group devlink_nl_mcgrps[] = {
+ [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
+};
+
+static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+ goto nla_put_failure;
+ if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
+ devlink_port->desired_type))
+ goto nla_put_failure;
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+ struct net_device *netdev = devlink_port->type_dev;
+
+ if (netdev &&
+ (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+ netdev->ifindex) ||
+ nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
+ netdev->name)))
+ goto nla_put_failure;
+ }
+ if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
+ struct ib_device *ibdev = devlink_port->type_dev;
+
+ if (ibdev &&
+ nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
+ ibdev->name))
+ goto nla_put_failure;
+ }
+ if (devlink_port->split &&
+ nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+ devlink_port->split_group))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_port_notify(struct devlink_port *devlink_port,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink_port->registered)
+ return;
+
+ WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ if (err)
+ goto out;
+ idx++;
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_port_fill(msg, devlink, devlink_port,
+ DEVLINK_CMD_PORT_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ struct devlink_port *devlink_port;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink_port_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_port_fill(msg, devlink, devlink_port,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err)
+ goto out;
+ idx++;
+ }
+ }
+out:
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_port_type_set(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ enum devlink_port_type port_type)
+
+{
+ int err;
+
+ if (devlink->ops && devlink->ops->port_type_set) {
+ if (port_type == DEVLINK_PORT_TYPE_NOTSET)
+ return -EINVAL;
+ err = devlink->ops->port_type_set(devlink_port, port_type);
+ if (err)
+ return err;
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
+ enum devlink_port_type port_type;
+
+ port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
+ err = devlink_port_type_set(devlink, devlink_port, port_type);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int devlink_port_split(struct devlink *devlink,
+ u32 port_index, u32 count)
+
+{
+ if (devlink->ops && devlink->ops->port_split)
+ return devlink->ops->port_split(devlink, port_index, count);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ u32 port_index;
+ u32 count;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
+ !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
+ return -EINVAL;
+
+ port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+ return devlink_port_split(devlink, port_index, count);
+}
+
+static int devlink_port_unsplit(struct devlink *devlink, u32 port_index)
+
+{
+ if (devlink->ops && devlink->ops->port_unsplit)
+ return devlink->ops->port_unsplit(devlink, port_index);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ u32 port_index;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ return devlink_port_unsplit(devlink, port_index);
+}
+
+static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+};
+
+static const struct genl_ops devlink_nl_ops[] = {
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .doit = devlink_nl_cmd_get_doit,
+ .dumpit = devlink_nl_cmd_get_dumpit,
+ .policy = devlink_nl_policy,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .doit = devlink_nl_cmd_port_get_doit,
+ .dumpit = devlink_nl_cmd_port_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_SET,
+ .doit = devlink_nl_cmd_port_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_SPLIT,
+ .doit = devlink_nl_cmd_port_split_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_UNSPLIT,
+ .doit = devlink_nl_cmd_port_unsplit_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+/**
+ * devlink_alloc - Allocate new devlink instance resources
+ *
+ * @ops: ops
+ * @priv_size: size of user private data
+ *
+ * Allocate new devlink instance resources, including devlink index
+ * and name.
+ */
+struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+{
+ struct devlink *devlink;
+
+ devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+ if (!devlink)
+ return NULL;
+ devlink->ops = ops;
+ devlink_net_set(devlink, &init_net);
+ INIT_LIST_HEAD(&devlink->port_list);
+ return devlink;
+}
+EXPORT_SYMBOL_GPL(devlink_alloc);
+
+/**
+ * devlink_register - Register devlink instance
+ *
+ * @devlink: devlink
+ */
+int devlink_register(struct devlink *devlink, struct device *dev)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dev = dev;
+ list_add_tail(&devlink->list, &devlink_list);
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_register);
+
+/**
+ * devlink_unregister - Unregister devlink instance
+ *
+ * @devlink: devlink
+ */
+void devlink_unregister(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+ list_del(&devlink->list);
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_unregister);
+
+/**
+ * devlink_free - Free devlink instance resources
+ *
+ * @devlink: devlink
+ */
+void devlink_free(struct devlink *devlink)
+{
+ kfree(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_free);
+
+/**
+ * devlink_port_register - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
+int devlink_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index)
+{
+ mutex_lock(&devlink_port_mutex);
+ if (devlink_port_index_exists(devlink, port_index)) {
+ mutex_unlock(&devlink_port_mutex);
+ return -EEXIST;
+ }
+ devlink_port->devlink = devlink;
+ devlink_port->index = port_index;
+ devlink_port->type = DEVLINK_PORT_TYPE_NOTSET;
+ devlink_port->registered = true;
+ list_add_tail(&devlink_port->list, &devlink->port_list);
+ mutex_unlock(&devlink_port_mutex);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_port_register);
+
+/**
+ * devlink_port_unregister - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ */
+void devlink_port_unregister(struct devlink_port *devlink_port)
+{
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ mutex_lock(&devlink_port_mutex);
+ list_del(&devlink_port->list);
+ mutex_unlock(&devlink_port_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_port_unregister);
+
+static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type type,
+ void *type_dev)
+{
+ devlink_port->type = type;
+ devlink_port->type_dev = type_dev;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+/**
+ * devlink_port_type_eth_set - Set port type to Ethernet
+ *
+ * @devlink_port: devlink port
+ * @netdev: related netdevice
+ */
+void devlink_port_type_eth_set(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+{
+ return __devlink_port_type_set(devlink_port,
+ DEVLINK_PORT_TYPE_ETH, netdev);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+
+/**
+ * devlink_port_type_ib_set - Set port type to InfiniBand
+ *
+ * @devlink_port: devlink port
+ * @ibdev: related IB device
+ */
+void devlink_port_type_ib_set(struct devlink_port *devlink_port,
+ struct ib_device *ibdev)
+{
+ return __devlink_port_type_set(devlink_port,
+ DEVLINK_PORT_TYPE_IB, ibdev);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+
+/**
+ * devlink_port_type_clear - Clear port type
+ *
+ * @devlink_port: devlink port
+ */
+void devlink_port_type_clear(struct devlink_port *devlink_port)
+{
+ return __devlink_port_type_set(devlink_port,
+ DEVLINK_PORT_TYPE_NOTSET, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+/**
+ * devlink_port_split_set - Set port is split
+ *
+ * @devlink_port: devlink port
+ * @split_group: split group - identifies group split port is part of
+ */
+void devlink_port_split_set(struct devlink_port *devlink_port,
+ u32 split_group)
+{
+ devlink_port->split = true;
+ devlink_port->split_group = split_group;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_port_split_set);
+
+static int __init devlink_module_init(void)
+{
+ return genl_register_family_with_ops_groups(&devlink_nl_family,
+ devlink_nl_ops,
+ devlink_nl_mcgrps);
+}
+
+static void __exit devlink_module_exit(void)
+{
+ genl_unregister_family(&devlink_nl_family);
+}
+
+module_init(devlink_module_init);
+module_exit(devlink_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Network physical device Netlink interface");
+MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
diff --git a/net/core/dst.c b/net/core/dst.c
index a1656e3b8d72..b5cbbe07f786 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -265,7 +265,7 @@ again:
lwtstate_put(dst->lwtstate);
if (dst->flags & DST_METADATA)
- kfree(dst);
+ metadata_dst_free((struct metadata_dst *)dst);
else
kmem_cache_free(dst->ops->kmem_cachep, dst);
@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
+void metadata_dst_free(struct metadata_dst *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+ dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
+#endif
+ kfree(md_dst);
+}
+
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
{
int cpu;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
new file mode 100644
index 000000000000..3938f3f38d69
--- /dev/null
+++ b/net/core/dst_cache.c
@@ -0,0 +1,168 @@
+/*
+ * net/core/dst_cache.c - dst entry cache
+ *
+ * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <net/dst_cache.h>
+#include <net/route.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_fib.h>
+#endif
+#include <uapi/linux/in.h>
+
+struct dst_cache_pcpu {
+ unsigned long refresh_ts;
+ struct dst_entry *dst;
+ u32 cookie;
+ union {
+ struct in_addr in_saddr;
+ struct in6_addr in6_saddr;
+ };
+};
+
+void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
+ struct dst_entry *dst, u32 cookie)
+{
+ dst_release(dst_cache->dst);
+ if (dst)
+ dst_hold(dst);
+
+ dst_cache->cookie = cookie;
+ dst_cache->dst = dst;
+}
+
+struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
+ struct dst_cache_pcpu *idst)
+{
+ struct dst_entry *dst;
+
+ dst = idst->dst;
+ if (!dst)
+ goto fail;
+
+ /* the cache already hold a dst reference; it can't go away */
+ dst_hold(dst);
+
+ if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+ (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
+ dst_cache_per_cpu_dst_set(idst, NULL, 0);
+ dst_release(dst);
+ goto fail;
+ }
+ return dst;
+
+fail:
+ idst->refresh_ts = jiffies;
+ return NULL;
+}
+
+struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
+{
+ if (!dst_cache->cache)
+ return NULL;
+
+ return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
+}
+EXPORT_SYMBOL_GPL(dst_cache_get);
+
+struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
+{
+ struct dst_cache_pcpu *idst;
+ struct dst_entry *dst;
+
+ if (!dst_cache->cache)
+ return NULL;
+
+ idst = this_cpu_ptr(dst_cache->cache);
+ dst = dst_cache_per_cpu_get(dst_cache, idst);
+ if (!dst)
+ return NULL;
+
+ *saddr = idst->in_saddr.s_addr;
+ return container_of(dst, struct rtable, dst);
+}
+EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
+
+void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
+ __be32 saddr)
+{
+ struct dst_cache_pcpu *idst;
+
+ if (!dst_cache->cache)
+ return;
+
+ idst = this_cpu_ptr(dst_cache->cache);
+ dst_cache_per_cpu_dst_set(idst, dst, 0);
+ idst->in_saddr.s_addr = saddr;
+}
+EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
+
+#if IS_ENABLED(CONFIG_IPV6)
+void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+ const struct in6_addr *addr)
+{
+ struct dst_cache_pcpu *idst;
+
+ if (!dst_cache->cache)
+ return;
+
+ idst = this_cpu_ptr(dst_cache->cache);
+ dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
+ rt6_get_cookie((struct rt6_info *)dst));
+ idst->in6_saddr = *addr;
+}
+EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
+
+struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
+ struct in6_addr *saddr)
+{
+ struct dst_cache_pcpu *idst;
+ struct dst_entry *dst;
+
+ if (!dst_cache->cache)
+ return NULL;
+
+ idst = this_cpu_ptr(dst_cache->cache);
+ dst = dst_cache_per_cpu_get(dst_cache, idst);
+ if (!dst)
+ return NULL;
+
+ *saddr = idst->in6_saddr;
+ return dst;
+}
+EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
+#endif
+
+int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
+{
+ dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
+ gfp | __GFP_ZERO);
+ if (!dst_cache->cache)
+ return -ENOMEM;
+
+ dst_cache_reset(dst_cache);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dst_cache_init);
+
+void dst_cache_destroy(struct dst_cache *dst_cache)
+{
+ int i;
+
+ if (!dst_cache->cache)
+ return;
+
+ for_each_possible_cpu(i)
+ dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
+
+ free_percpu(dst_cache->cache);
+}
+EXPORT_SYMBOL_GPL(dst_cache_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..2966cd0d7c93 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_BUSY_POLL_BIT] = "busy-poll",
+ [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
};
static const char
@@ -386,43 +387,461 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
-int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
{
+ bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ dst[0] = legacy_u32;
+}
+
+/* return false if src had higher bits set. lower bits always updated. */
+static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
+ const unsigned long *src)
+{
+ bool retval = true;
+
+ /* TODO: following test will soon always be true */
+ if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
+
+ bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_fill(ext, 32);
+ bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ if (bitmap_intersects(ext, src,
+ __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ /* src mask goes beyond bit 31 */
+ retval = false;
+ }
+ }
+ *legacy_u32 = src[0];
+ return retval;
+}
+
+/* return false if legacy contained non-0 deprecated fields
+ * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ */
+static bool
+convert_legacy_settings_to_link_ksettings(
+ struct ethtool_link_ksettings *link_ksettings,
+ const struct ethtool_cmd *legacy_settings)
+{
+ bool retval = true;
+
+ memset(link_ksettings, 0, sizeof(*link_ksettings));
+
+ /* This is used to tell users that driver is still using these
+ * deprecated legacy fields, and they should not use
+ * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
+ */
+ if (legacy_settings->transceiver ||
+ legacy_settings->maxtxpkt ||
+ legacy_settings->maxrxpkt)
+ retval = false;
+
+ convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.supported,
+ legacy_settings->supported);
+ convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.advertising,
+ legacy_settings->advertising);
+ convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.lp_advertising,
+ legacy_settings->lp_advertising);
+ link_ksettings->base.speed
+ = ethtool_cmd_speed(legacy_settings);
+ link_ksettings->base.duplex
+ = legacy_settings->duplex;
+ link_ksettings->base.port
+ = legacy_settings->port;
+ link_ksettings->base.phy_address
+ = legacy_settings->phy_address;
+ link_ksettings->base.autoneg
+ = legacy_settings->autoneg;
+ link_ksettings->base.mdio_support
+ = legacy_settings->mdio_support;
+ link_ksettings->base.eth_tp_mdix
+ = legacy_settings->eth_tp_mdix;
+ link_ksettings->base.eth_tp_mdix_ctrl
+ = legacy_settings->eth_tp_mdix_ctrl;
+ return retval;
+}
+
+/* return false if ksettings link modes had higher bits
+ * set. legacy_settings always updated (best effort)
+ */
+static bool
+convert_link_ksettings_to_legacy_settings(
+ struct ethtool_cmd *legacy_settings,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ bool retval = true;
+
+ memset(legacy_settings, 0, sizeof(*legacy_settings));
+ /* this also clears the deprecated fields in legacy structure:
+ * __u8 transceiver;
+ * __u32 maxtxpkt;
+ * __u32 maxrxpkt;
+ */
+
+ retval &= convert_link_mode_to_legacy_u32(
+ &legacy_settings->supported,
+ link_ksettings->link_modes.supported);
+ retval &= convert_link_mode_to_legacy_u32(
+ &legacy_settings->advertising,
+ link_ksettings->link_modes.advertising);
+ retval &= convert_link_mode_to_legacy_u32(
+ &legacy_settings->lp_advertising,
+ link_ksettings->link_modes.lp_advertising);
+ ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
+ legacy_settings->duplex
+ = link_ksettings->base.duplex;
+ legacy_settings->port
+ = link_ksettings->base.port;
+ legacy_settings->phy_address
+ = link_ksettings->base.phy_address;
+ legacy_settings->autoneg
+ = link_ksettings->base.autoneg;
+ legacy_settings->mdio_support
+ = link_ksettings->base.mdio_support;
+ legacy_settings->eth_tp_mdix
+ = link_ksettings->base.eth_tp_mdix;
+ legacy_settings->eth_tp_mdix_ctrl
+ = link_ksettings->base.eth_tp_mdix_ctrl;
+ return retval;
+}
+
+/* number of 32-bit words to store the user's link mode bitmaps */
+#define __ETHTOOL_LINK_MODE_MASK_NU32 \
+ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
+
+/* layout of the struct passed from/to userland */
+struct ethtool_link_usettings {
+ struct ethtool_link_settings base;
+ struct {
+ __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
+ __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
+ __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
+ } link_modes;
+};
+
+/* Internal kernel helper to query a device ethtool_link_settings.
+ *
+ * Backward compatibility note: for compatibility with legacy drivers
+ * that implement only the ethtool_cmd API, this has to work with both
+ * drivers implementing get_link_ksettings API and drivers
+ * implementing get_settings API. When drivers implement get_settings
+ * and report ethtool_cmd deprecated fields
+ * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
+ * because the resulting struct ethtool_link_settings does not report them.
+ */
+int __ethtool_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ int err;
+ struct ethtool_cmd cmd;
+
ASSERT_RTNL();
+ if (dev->ethtool_ops->get_link_ksettings) {
+ memset(link_ksettings, 0, sizeof(*link_ksettings));
+ return dev->ethtool_ops->get_link_ksettings(dev,
+ link_ksettings);
+ }
+
+ /* driver doesn't support %ethtool_link_ksettings API. revert to
+ * legacy %ethtool_cmd API, unless it's not supported either.
+ * TODO: remove when ethtool_ops::get_settings disappears internally
+ */
if (!dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
- memset(cmd, 0, sizeof(struct ethtool_cmd));
- cmd->cmd = ETHTOOL_GSET;
- return dev->ethtool_ops->get_settings(dev, cmd);
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cmd = ETHTOOL_GSET;
+ err = dev->ethtool_ops->get_settings(dev, &cmd);
+ if (err < 0)
+ return err;
+
+ /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
+ */
+ convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
+ return err;
}
-EXPORT_SYMBOL(__ethtool_get_settings);
+EXPORT_SYMBOL(__ethtool_get_link_ksettings);
-static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+/* convert ethtool_link_usettings in user space to a kernel internal
+ * ethtool_link_ksettings. return 0 on success, errno on error.
+ */
+static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
+ const void __user *from)
{
- int err;
- struct ethtool_cmd cmd;
+ struct ethtool_link_usettings link_usettings;
+
+ if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
+ return -EFAULT;
+
+ memcpy(&to->base, &link_usettings.base, sizeof(to->base));
+ bitmap_from_u32array(to->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_usettings.link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NU32);
+ bitmap_from_u32array(to->link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_usettings.link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NU32);
+ bitmap_from_u32array(to->link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_usettings.link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NU32);
+
+ return 0;
+}
+
+/* convert a kernel internal ethtool_link_ksettings to
+ * ethtool_link_usettings in user space. return 0 on success, errno on
+ * error.
+ */
+static int
+store_link_ksettings_for_user(void __user *to,
+ const struct ethtool_link_ksettings *from)
+{
+ struct ethtool_link_usettings link_usettings;
+
+ memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
+ bitmap_to_u32array(link_usettings.link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NU32,
+ from->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_to_u32array(link_usettings.link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NU32,
+ from->link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_to_u32array(link_usettings.link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NU32,
+ from->link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/* Query device for its ethtool_link_settings.
+ *
+ * Backward compatibility note: this function must fail when driver
+ * does not implement ethtool::get_link_ksettings, even if legacy
+ * ethtool_ops::get_settings is implemented. This tells new versions
+ * of ethtool that they should use the legacy API %ETHTOOL_GSET for
+ * this driver, so that they can correctly access the ethtool_cmd
+ * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
+ * implements ethtool_ops::get_settings anymore.
+ */
+static int ethtool_get_link_ksettings(struct net_device *dev,
+ void __user *useraddr)
+{
+ int err = 0;
+ struct ethtool_link_ksettings link_ksettings;
- err = __ethtool_get_settings(dev, &cmd);
+ ASSERT_RTNL();
+
+ if (!dev->ethtool_ops->get_link_ksettings)
+ return -EOPNOTSUPP;
+
+ /* handle bitmap nbits handshake */
+ if (copy_from_user(&link_ksettings.base, useraddr,
+ sizeof(link_ksettings.base)))
+ return -EFAULT;
+
+ if (__ETHTOOL_LINK_MODE_MASK_NU32
+ != link_ksettings.base.link_mode_masks_nwords) {
+ /* wrong link mode nbits requested */
+ memset(&link_ksettings, 0, sizeof(link_ksettings));
+ /* keep cmd field reset to 0 */
+ /* send back number of words required as negative val */
+ compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
+ "need too many bits for link modes!");
+ link_ksettings.base.link_mode_masks_nwords
+ = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
+
+ /* copy the base fields back to user, not the link
+ * mode bitmaps
+ */
+ if (copy_to_user(useraddr, &link_ksettings.base,
+ sizeof(link_ksettings.base)))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ /* handshake successful: user/kernel agree on
+ * link_mode_masks_nwords
+ */
+
+ memset(&link_ksettings, 0, sizeof(link_ksettings));
+ err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
if (err < 0)
return err;
+ /* make sure we tell the right values to user */
+ link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
+ link_ksettings.base.link_mode_masks_nwords
+ = __ETHTOOL_LINK_MODE_MASK_NU32;
+
+ return store_link_ksettings_for_user(useraddr, &link_ksettings);
+}
+
+/* Update device ethtool_link_settings.
+ *
+ * Backward compatibility note: this function must fail when driver
+ * does not implement ethtool::set_link_ksettings, even if legacy
+ * ethtool_ops::set_settings is implemented. This tells new versions
+ * of ethtool that they should use the legacy API %ETHTOOL_SSET for
+ * this driver, so that they can correctly update the ethtool_cmd
+ * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
+ * implements ethtool_ops::get_settings anymore.
+ */
+static int ethtool_set_link_ksettings(struct net_device *dev,
+ void __user *useraddr)
+{
+ int err;
+ struct ethtool_link_ksettings link_ksettings;
+
+ ASSERT_RTNL();
+
+ if (!dev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+
+ /* make sure nbits field has expected value */
+ if (copy_from_user(&link_ksettings.base, useraddr,
+ sizeof(link_ksettings.base)))
+ return -EFAULT;
+
+ if (__ETHTOOL_LINK_MODE_MASK_NU32
+ != link_ksettings.base.link_mode_masks_nwords)
+ return -EINVAL;
+
+ /* copy the whole structure, now that we know it has expected
+ * format
+ */
+ err = load_link_ksettings_from_user(&link_ksettings, useraddr);
+ if (err)
+ return err;
+
+ /* re-check nwords field, just in case */
+ if (__ETHTOOL_LINK_MODE_MASK_NU32
+ != link_ksettings.base.link_mode_masks_nwords)
+ return -EINVAL;
+
+ return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+}
+
+static void
+warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
+{
+ char name[sizeof(current->comm)];
+
+ pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
+ get_task_comm(name, current), details);
+}
+
+/* Query device for its ethtool_cmd settings.
+ *
+ * Backward compatibility note: for compatibility with legacy ethtool,
+ * this has to work with both drivers implementing get_link_ksettings
+ * API and drivers implementing get_settings API. When drivers
+ * implement get_link_ksettings and report higher link mode bits, a
+ * kernel warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, but the command is successful
+ * (only the lower link mode bits reported back to user).
+ */
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_cmd cmd;
+
+ ASSERT_RTNL();
+
+ if (dev->ethtool_ops->get_link_ksettings) {
+ /* First, use link_ksettings API if it is supported */
+ int err;
+ struct ethtool_link_ksettings link_ksettings;
+
+ memset(&link_ksettings, 0, sizeof(link_ksettings));
+ err = dev->ethtool_ops->get_link_ksettings(dev,
+ &link_ksettings);
+ if (err < 0)
+ return err;
+ if (!convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings))
+ warn_incomplete_ethtool_legacy_settings_conversion(
+ "link modes are only partially reported");
+
+ /* send a sensible cmd tag back to user */
+ cmd.cmd = ETHTOOL_GSET;
+ } else {
+ /* driver doesn't support %ethtool_link_ksettings
+ * API. revert to legacy %ethtool_cmd API, unless it's
+ * not supported either.
+ */
+ int err;
+
+ if (!dev->ethtool_ops->get_settings)
+ return -EOPNOTSUPP;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cmd = ETHTOOL_GSET;
+ err = dev->ethtool_ops->get_settings(dev, &cmd);
+ if (err < 0)
+ return err;
+ }
+
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
+
return 0;
}
+/* Update device link settings with given ethtool_cmd.
+ *
+ * Backward compatibility note: for compatibility with legacy ethtool,
+ * this has to work with both drivers implementing set_link_ksettings
+ * API and drivers implementing set_settings API. When drivers
+ * implement set_link_ksettings and user's request updates deprecated
+ * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
+ * warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, and the request is rejected.
+ */
static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd;
- if (!dev->ethtool_ops->set_settings)
- return -EOPNOTSUPP;
+ ASSERT_RTNL();
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
+ /* first, try new %ethtool_link_ksettings API. */
+ if (dev->ethtool_ops->set_link_ksettings) {
+ struct ethtool_link_ksettings link_ksettings;
+
+ if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
+ &cmd))
+ return -EINVAL;
+
+ link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
+ link_ksettings.base.link_mode_masks_nwords
+ = __ETHTOOL_LINK_MODE_MASK_NU32;
+ return dev->ethtool_ops->set_link_ksettings(dev,
+ &link_ksettings);
+ }
+
+ /* legacy %ethtool_cmd API */
+
+ /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
+ * disappears internally
+ */
+
+ if (!dev->ethtool_ops->set_settings)
+ return -EOPNOTSUPP;
+
return dev->ethtool_ops->set_settings(dev, &cmd);
}
@@ -632,7 +1051,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
return 0;
}
-u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len)
{
@@ -642,6 +1061,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
}
EXPORT_SYMBOL(netdev_rss_key_fill);
+static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
+{
+ u32 dev_size, current_max = 0;
+ u32 *indir;
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh_indir_size ||
+ !dev->ethtool_ops->get_rxfh)
+ return -EOPNOTSUPP;
+ dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (dev_size == 0)
+ return -EOPNOTSUPP;
+
+ indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
+ if (!indir)
+ return -ENOMEM;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ if (ret)
+ goto out;
+
+ while (dev_size--)
+ current_max = max(current_max, indir[dev_size]);
+
+ *max = current_max;
+
+out:
+ kfree(indir);
+ return ret;
+}
+
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
@@ -738,6 +1188,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
}
ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
+ if (ret)
+ goto out;
+
+ /* indicate whether rxfh was set to default */
+ if (user_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
kfree(indir);
@@ -897,6 +1355,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (ret)
+ goto out;
+
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
kfree(rss_config);
@@ -1227,14 +1693,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_channels channels;
+ struct ethtool_channels channels, max;
+ u32 max_rx_in_use = 0;
- if (!dev->ethtool_ops->set_channels)
+ if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
return -EOPNOTSUPP;
if (copy_from_user(&channels, useraddr, sizeof(channels)))
return -EFAULT;
+ dev->ethtool_ops->get_channels(dev, &max);
+
+ /* ensure new counts are within the maximums */
+ if ((channels.rx_count > max.max_rx) ||
+ (channels.tx_count > max.max_tx) ||
+ (channels.combined_count > max.max_combined) ||
+ (channels.other_count > max.max_other))
+ return -EINVAL;
+
+ /* ensure the new Rx count fits within the configured Rx flow
+ * indirection table settings */
+ if (netif_is_rxfh_configured(dev) &&
+ !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
+ (channels.combined_count + channels.rx_count) <= max_rx_in_use)
+ return -EINVAL;
+
return dev->ethtool_ops->set_channels(dev, &channels);
}
@@ -1823,13 +2306,121 @@ out:
return ret;
}
+static int ethtool_get_per_queue_coalesce(struct net_device *dev,
+ void __user *useraddr,
+ struct ethtool_per_queue_op *per_queue_opt)
+{
+ u32 bit;
+ int ret;
+ DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
+
+ if (!dev->ethtool_ops->get_per_queue_coalesce)
+ return -EOPNOTSUPP;
+
+ useraddr += sizeof(*per_queue_opt);
+
+ bitmap_from_u32array(queue_mask,
+ MAX_NUM_QUEUE,
+ per_queue_opt->queue_mask,
+ DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
+
+ for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
+ struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+
+ ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
+ if (ret != 0)
+ return ret;
+ if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+ return -EFAULT;
+ useraddr += sizeof(coalesce);
+ }
+
+ return 0;
+}
+
+static int ethtool_set_per_queue_coalesce(struct net_device *dev,
+ void __user *useraddr,
+ struct ethtool_per_queue_op *per_queue_opt)
+{
+ u32 bit;
+ int i, ret = 0;
+ int n_queue;
+ struct ethtool_coalesce *backup = NULL, *tmp = NULL;
+ DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
+
+ if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
+ (!dev->ethtool_ops->get_per_queue_coalesce))
+ return -EOPNOTSUPP;
+
+ useraddr += sizeof(*per_queue_opt);
+
+ bitmap_from_u32array(queue_mask,
+ MAX_NUM_QUEUE,
+ per_queue_opt->queue_mask,
+ DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
+ n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
+ tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
+ if (!backup)
+ return -ENOMEM;
+
+ for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
+ struct ethtool_coalesce coalesce;
+
+ ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
+ if (ret != 0)
+ goto roll_back;
+
+ tmp++;
+
+ if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
+ ret = -EFAULT;
+ goto roll_back;
+ }
+
+ ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
+ if (ret != 0)
+ goto roll_back;
+
+ useraddr += sizeof(coalesce);
+ }
+
+roll_back:
+ if (ret != 0) {
+ tmp = backup;
+ for_each_set_bit(i, queue_mask, bit) {
+ dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
+ tmp++;
+ }
+ }
+ kfree(backup);
+
+ return ret;
+}
+
+static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_per_queue_op per_queue_opt;
+
+ if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
+ return -EFAULT;
+
+ switch (per_queue_opt.sub_command) {
+ case ETHTOOL_GCOALESCE:
+ return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
+ case ETHTOOL_SCOALESCE:
+ return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
+ default:
+ return -EOPNOTSUPP;
+ };
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
{
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
- u32 ethcmd;
+ u32 ethcmd, sub_cmd;
int rc;
netdev_features_t old_features;
@@ -1839,8 +2430,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
+ if (ethcmd == ETHTOOL_PERQUEUE) {
+ if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
+ return -EFAULT;
+ } else {
+ sub_cmd = ethcmd;
+ }
/* Allow some commands to be done by anyone */
- switch (ethcmd) {
+ switch (sub_cmd) {
case ETHTOOL_GSET:
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
@@ -2070,6 +2667,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GPHYSTATS:
rc = ethtool_get_phy_stats(dev, useraddr);
break;
+ case ETHTOOL_PERQUEUE:
+ rc = ethtool_set_per_queue(dev, useraddr);
+ break;
+ case ETHTOOL_GLINKSETTINGS:
+ rc = ethtool_get_link_ksettings(dev, useraddr);
+ break;
+ case ETHTOOL_SLINKSETTINGS:
+ rc = ethtool_set_link_ksettings(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 94d26201080d..a3aba15a8025 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -530,12 +530,14 @@ do_pass:
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
- /* RET_K, RET_A are remaped into 2 insns. */
+ /* RET_K is remaped into 2 insns. RET_A case doesn't need an
+ * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
+ */
case BPF_RET | BPF_A:
case BPF_RET | BPF_K:
- *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
- BPF_K : BPF_X, BPF_REG_0,
- BPF_REG_A, fp->k);
+ if (BPF_RVAL(fp->code) == BPF_K)
+ *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
+ 0, fp->k);
*insn = BPF_EXIT_INSN();
break;
@@ -1181,7 +1183,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
return -ENOMEM;
- if (sk_unhashed(sk)) {
+ if (sk_unhashed(sk) && sk->sk_reuseport) {
err = reuseport_alloc(sk);
if (err)
return err;
@@ -1333,15 +1335,22 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-#define BPF_LDST_LEN 16U
+struct bpf_scratchpad {
+ union {
+ __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
+ u8 buff[MAX_BPF_STACK];
+ };
+};
+
+static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
+ struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
struct sk_buff *skb = (struct sk_buff *) (long) r1;
int offset = (int) r2;
void *from = (void *) (long) r3;
unsigned int len = (unsigned int) r4;
- char buf[BPF_LDST_LEN];
void *ptr;
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
@@ -1355,14 +1364,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*
* so check for invalid 'offset' and too large 'len'
*/
- if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
+ if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
return -EFAULT;
-
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + len)))
+ if (unlikely(skb_try_make_writable(skb, offset + len)))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, len, buf);
+ ptr = skb_header_pointer(skb, offset, len, sp->buff);
if (unlikely(!ptr))
return -EFAULT;
@@ -1371,7 +1378,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
memcpy(ptr, from, len);
- if (ptr == buf)
+ if (ptr == sp->buff)
/* skb_store_bits cannot return -EFAULT here */
skb_store_bits(skb, offset, ptr, len);
@@ -1400,7 +1407,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
unsigned int len = (unsigned int) r4;
void *ptr;
- if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN))
+ if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -1432,9 +1439,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
-
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(sum))))
+ if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1474,23 +1479,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+ bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
int offset = (int) r2;
__sum16 sum, *ptr;
- if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
+ BPF_F_HDR_FIELD_MASK)))
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
-
- if (unlikely(skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(sum))))
+ if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
if (unlikely(!ptr))
return -EFAULT;
+ if (is_mmzero && !*ptr)
+ return 0;
switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
break;
@@ -1501,6 +1514,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
}
+ if (is_mmzero && !*ptr)
+ *ptr = CSUM_MANGLED_0;
if (ptr == &sum)
/* skb_store_bits guaranteed to not return -EFAULT here */
skb_store_bits(skb, offset, ptr, sizeof(sum));
@@ -1519,6 +1534,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
+static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
+{
+ struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
+ u64 diff_size = from_size + to_size;
+ __be32 *from = (__be32 *) (long) r1;
+ __be32 *to = (__be32 *) (long) r3;
+ int i, j = 0;
+
+ /* This is quite flexible, some examples:
+ *
+ * from_size == 0, to_size > 0, seed := csum --> pushing data
+ * from_size > 0, to_size == 0, seed := csum --> pulling data
+ * from_size > 0, to_size > 0, seed := 0 --> diffing data
+ *
+ * Even for diffing, from_size and to_size don't need to be equal.
+ */
+ if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
+ diff_size > sizeof(sp->diff)))
+ return -EINVAL;
+
+ for (i = 0; i < from_size / sizeof(__be32); i++, j++)
+ sp->diff[j] = ~from[i];
+ for (i = 0; i < to_size / sizeof(__be32); i++, j++)
+ sp->diff[j] = to[i];
+
+ return csum_partial(sp->diff, diff_size, seed);
+}
+
+const struct bpf_func_proto bpf_csum_diff_proto = {
+ .func = bpf_csum_diff,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg5_type = ARG_ANYTHING,
+};
+
static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
@@ -1682,6 +1736,13 @@ bool bpf_helper_changes_skb_data(void *func)
return true;
if (func == bpf_skb_vlan_pop)
return true;
+ if (func == bpf_skb_store_bytes)
+ return true;
+ if (func == bpf_l3_csum_replace)
+ return true;
+ if (func == bpf_l4_csum_replace)
+ return true;
+
return false;
}
@@ -1849,6 +1910,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_store_bytes_proto;
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c9d1b9..7c7b8739b8b8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,15 +178,16 @@ ip:
ip_proto = iph->protocol;
- if (!dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS))
- break;
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
- key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container);
- memcpy(&key_addrs->v4addrs, &iph->saddr,
- sizeof(key_addrs->v4addrs));
- key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ memcpy(&key_addrs->v4addrs, &iph->saddr,
+ sizeof(key_addrs->v4addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ }
if (ip_is_fragment(iph)) {
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -208,7 +209,6 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
- __be32 flow_label;
ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -220,18 +220,21 @@ ipv6:
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
- struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
-
- key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
- target_container);
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
- memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
+ memcpy(&key_addrs->v6addrs, &iph->saddr,
+ sizeof(key_addrs->v6addrs));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- flow_label = ip6_flowlabel(iph);
- if (flow_label) {
+ if ((dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
+ (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
+ ip6_flowlabel(iph)) {
+ __be32 flow_label = ip6_flowlabel(iph);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
key_tags = skb_flow_dissector_target(flow_dissector,
@@ -336,8 +339,11 @@ mpls:
}
case htons(ETH_P_FCOE):
- key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
- /* fall through */
+ if ((hlen - nhoff) < FCOE_HEADER_LEN)
+ goto out_bad;
+
+ nhoff += FCOE_HEADER_LEN;
+ goto out_good;
default:
goto out_bad;
}
@@ -396,6 +402,13 @@ ip_proto_again:
goto out_bad;
proto = eth->h_proto;
nhoff += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = nhoff;
}
key_control->flags |= FLOW_DIS_ENCAPSULATION;
@@ -437,13 +450,12 @@ ip_proto_again:
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
nhoff += sizeof(_fh);
+ ip_proto = fh->nexthdr;
if (!(fh->frag_off & htons(IP6_OFFSET))) {
key_control->flags |= FLOW_DIS_FIRST_FRAG;
- if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
- ip_proto = fh->nexthdr;
+ if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
goto ip_proto_again;
- }
}
goto out_good;
}
@@ -730,6 +742,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
{
u32 poff = keys->control.thoff;
+ /* skip L4 headers for fragments after the first */
+ if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
+ !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
+ return poff;
+
switch (keys->basic.ip_proto) {
case IPPROTO_TCP: {
/* access doff as u8 to avoid unaligned access */
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 299cfc24d888..669ecc9f884e 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -27,6 +27,31 @@
#include <net/rtnetlink.h>
#include <net/ip6_fib.h>
+#ifdef CONFIG_MODULES
+
+static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
+{
+ /* Only lwt encaps implemented without using an interface for
+ * the encap need to return a string here.
+ */
+ switch (encap_type) {
+ case LWTUNNEL_ENCAP_MPLS:
+ return "MPLS";
+ case LWTUNNEL_ENCAP_ILA:
+ return "ILA";
+ case LWTUNNEL_ENCAP_IP6:
+ case LWTUNNEL_ENCAP_IP:
+ case LWTUNNEL_ENCAP_NONE:
+ case __LWTUNNEL_ENCAP_MAX:
+ /* should not have got here */
+ WARN_ON(1);
+ break;
+ }
+ return NULL;
+}
+
+#endif /* CONFIG_MODULES */
+
struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
{
struct lwtunnel_state *lws;
@@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
+#ifdef CONFIG_MODULES
+ if (!ops) {
+ const char *encap_type_str = lwtunnel_encap_str(encap_type);
+
+ if (encap_type_str) {
+ rcu_read_unlock();
+ request_module("rtnl-lwt-%s", encap_type_str);
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[encap_type]);
+ }
+ }
+#endif
if (likely(ops && ops->build_state))
ret = ops->build_state(dev, encap, family, cfg, lws);
rcu_read_unlock();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b6c8a6629b39..2b3f76fe65f4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,7 +29,6 @@
#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
-static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -199,9 +198,10 @@ static ssize_t speed_show(struct device *dev,
return restart_syscall();
if (netif_running(netdev)) {
- struct ethtool_cmd cmd;
- if (!__ethtool_get_settings(netdev, &cmd))
- ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+ struct ethtool_link_ksettings cmd;
+
+ if (!__ethtool_get_link_ksettings(netdev, &cmd))
+ ret = sprintf(buf, fmt_dec, cmd.base.speed);
}
rtnl_unlock();
return ret;
@@ -218,10 +218,12 @@ static ssize_t duplex_show(struct device *dev,
return restart_syscall();
if (netif_running(netdev)) {
- struct ethtool_cmd cmd;
- if (!__ethtool_get_settings(netdev, &cmd)) {
+ struct ethtool_link_ksettings cmd;
+
+ if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
const char *duplex;
- switch (cmd.duplex) {
+
+ switch (cmd.base.duplex) {
case DUPLEX_HALF:
duplex = "half";
break;
@@ -574,6 +576,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors);
NETSTAT_ENTRY(tx_window_errors);
NETSTAT_ENTRY(rx_compressed);
NETSTAT_ENTRY(tx_compressed);
+NETSTAT_ENTRY(rx_nohandler);
static struct attribute *netstat_attrs[] = {
&dev_attr_rx_packets.attr,
@@ -599,6 +602,7 @@ static struct attribute *netstat_attrs[] = {
&dev_attr_tx_window_errors.attr,
&dev_attr_rx_compressed.attr,
&dev_attr_tx_compressed.attr,
+ &dev_attr_rx_nohandler.attr,
NULL
};
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0260c84ed83c..11fce17274f6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -9,7 +9,6 @@
* Authors: Thomas Graf <tgraf@suug.ch>
*/
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fdtable.h>
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f1efbc39ef6b..2ec86fc552df 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,7 +11,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/string.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e854f916..62737f437c8e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->rx_compressed = b->rx_compressed;
a->tx_compressed = b->tx_compressed;
+
+ a->rx_nohandler = b->rx_nohandler;
}
static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
@@ -1412,6 +1414,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
};
+static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+{
+ const struct rtnl_link_ops *ops = NULL;
+ struct nlattr *linfo[IFLA_INFO_MAX + 1];
+
+ if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
+ return NULL;
+
+ if (linfo[IFLA_INFO_KIND]) {
+ char kind[MODULE_NAME_LEN];
+
+ nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
+ ops = rtnl_link_ops_get(kind);
+ }
+
+ return ops;
+}
+
+static bool link_master_filtered(struct net_device *dev, int master_idx)
+{
+ struct net_device *master;
+
+ if (!master_idx)
+ return false;
+
+ master = netdev_master_upper_dev_get(dev);
+ if (!master || master->ifindex != master_idx)
+ return true;
+
+ return false;
+}
+
+static bool link_kind_filtered(const struct net_device *dev,
+ const struct rtnl_link_ops *kind_ops)
+{
+ if (kind_ops && dev->rtnl_link_ops != kind_ops)
+ return true;
+
+ return false;
+}
+
+static bool link_dump_filtered(struct net_device *dev,
+ int master_idx,
+ const struct rtnl_link_ops *kind_ops)
+{
+ if (link_master_filtered(dev, master_idx) ||
+ link_kind_filtered(dev, kind_ops))
+ return true;
+
+ return false;
+}
+
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -1421,6 +1475,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_head *head;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
+ const struct rtnl_link_ops *kind_ops = NULL;
+ unsigned int flags = NLM_F_MULTI;
+ int master_idx = 0;
int err;
int hdrlen;
@@ -1443,18 +1500,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
+ if (tb[IFLA_MASTER])
+ master_idx = nla_get_u32(tb[IFLA_MASTER]);
+
+ if (tb[IFLA_LINKINFO])
+ kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
+
+ if (master_idx || kind_ops)
+ flags |= NLM_F_DUMP_FILTERED;
}
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
+ if (link_dump_filtered(dev, master_idx, kind_ops))
+ continue;
if (idx < s_idx)
goto cont;
err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI,
+ flags,
ext_filter_mask);
/* If we ran out of room on the first message,
* we're in trouble
diff --git a/net/core/scm.c b/net/core/scm.c
index 14596fb37172..2696aefdc148 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fplp = fpl;
fpl->count = 0;
fpl->max = SCM_MAX_FD;
+ fpl->user = NULL;
}
fpp = &fpl->fp[fpl->count];
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fpp++ = file;
fpl->count++;
}
+
+ if (!fpl->user)
+ fpl->user = get_uid(current_user());
+
return num;
}
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
+ free_uid(fpl->user);
kfree(fpl);
}
}
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
new_fpl->max = new_fpl->count;
+ new_fpl->user = get_uid(fpl->user);
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..488566b09c6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
* skb_panic - private function for out-of-line support
@@ -347,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(build_skb);
+#define NAPI_SKB_CACHE_SIZE 64
+
+struct napi_alloc_cache {
+ struct page_frag_cache page;
+ size_t skb_count;
+ void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
+
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
@@ -378,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
- struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return __alloc_page_frag(nc, fragsz, gfp_mask);
+ return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask)
{
- struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
void *data;
@@ -494,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = __alloc_page_frag(nc, len, gfp_mask);
+ data = __alloc_page_frag(&nc->page, len, gfp_mask);
if (unlikely(!data))
return NULL;
@@ -505,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
}
/* use OR instead of assignment to avoid clearing of bits in mask */
- if (nc->pfmemalloc)
+ if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -747,6 +757,73 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);
+void __kfree_skb_flush(void)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ /* flush skb_cache if containing objects */
+ if (nc->skb_count) {
+ kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+ nc->skb_cache);
+ nc->skb_count = 0;
+ }
+}
+
+static inline void _kfree_skb_defer(struct sk_buff *skb)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ /* drop skb->head and call any destructors for packet */
+ skb_release_all(skb);
+
+ /* record skb to CPU local list */
+ nc->skb_cache[nc->skb_count++] = skb;
+
+#ifdef CONFIG_SLUB
+ /* SLUB writes into objects when freeing */
+ prefetchw(skb);
+#endif
+
+ /* flush skb_cache if it is filled */
+ if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+ kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+ nc->skb_cache);
+ nc->skb_count = 0;
+ }
+}
+void __kfree_skb_defer(struct sk_buff *skb)
+{
+ _kfree_skb_defer(skb);
+}
+
+void napi_consume_skb(struct sk_buff *skb, int budget)
+{
+ if (unlikely(!skb))
+ return;
+
+ /* if budget is 0 assume netpoll w/ IRQs disabled */
+ if (unlikely(!budget)) {
+ dev_consume_skb_irq(skb);
+ return;
+ }
+
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ /* if reaching here SKB is ready to free */
+ trace_consume_skb(skb);
+
+ /* if SKB is a clone, don't handle this case */
+ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+ __kfree_skb(skb);
+ return;
+ }
+
+ _kfree_skb_defer(skb);
+}
+EXPORT_SYMBOL(napi_consume_skb);
+
/* Make sure a field is enclosed inside headers_start/headers_end section */
#define CHECK_SKB_FIELD(field) \
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
@@ -3004,8 +3081,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
- csum = !head_skb->encap_hdr_csum &&
- !!can_checksum_protocol(features, proto);
+ csum = !!can_checksum_protocol(features, proto);
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -3098,13 +3174,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (nskb->len == len + doffset)
goto perform_csum_check;
- if (!sg && !nskb->remcsum_offload) {
- nskb->ip_summed = CHECKSUM_NONE;
- nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
- skb_put(nskb, len),
- len, 0);
+ if (!sg) {
+ if (!nskb->remcsum_offload)
+ nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum =
+ skb_copy_and_csum_bits(head_skb, offset,
+ skb_put(nskb, len),
+ len, 0);
SKB_GSO_CB(nskb)->csum_start =
- skb_headroom(nskb) + doffset;
+ skb_headroom(nskb) + doffset;
continue;
}
@@ -3170,12 +3248,19 @@ skip_fraglist:
nskb->truesize += nskb->data_len;
perform_csum_check:
- if (!csum && !nskb->remcsum_offload) {
- nskb->csum = skb_checksum(nskb, doffset,
- nskb->len - doffset, 0);
- nskb->ip_summed = CHECKSUM_NONE;
+ if (!csum) {
+ if (skb_has_shared_frag(nskb)) {
+ err = __skb_linearize(nskb);
+ if (err)
+ goto err;
+ }
+ if (!nskb->remcsum_offload)
+ nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum =
+ skb_checksum(nskb, doffset,
+ nskb->len - doffset, 0);
SKB_GSO_CB(nskb)->csum_start =
- skb_headroom(nskb) + doffset;
+ skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
@@ -4413,9 +4498,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
skb->mac_len += VLAN_HLEN;
__skb_pull(skb, offset);
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
+ skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 51270238e269..4493ff820c2c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -195,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap)
}
EXPORT_SYMBOL(sk_net_capable);
-
-#ifdef CONFIG_MEMCG_KMEM
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
- struct proto *proto;
- int ret = 0;
-
- mutex_lock(&proto_list_mutex);
- list_for_each_entry(proto, &proto_list, node) {
- if (proto->init_cgroup) {
- ret = proto->init_cgroup(memcg, ss);
- if (ret)
- goto out;
- }
- }
-
- mutex_unlock(&proto_list_mutex);
- return ret;
-out:
- list_for_each_entry_continue_reverse(proto, &proto_list, node)
- if (proto->destroy_cgroup)
- proto->destroy_cgroup(memcg);
- mutex_unlock(&proto_list_mutex);
- return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
- struct proto *proto;
-
- mutex_lock(&proto_list_mutex);
- list_for_each_entry_reverse(proto, &proto_list, node)
- if (proto->destroy_cgroup)
- proto->destroy_cgroup(memcg);
- mutex_unlock(&proto_list_mutex);
-}
-#endif
-
/*
* Each address family might have different locking rules, so we have
* one slock key per address family:
@@ -240,11 +202,6 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
-#if defined(CONFIG_MEMCG_KMEM)
-struct static_key memcg_socket_limit_enabled;
-EXPORT_SYMBOL(memcg_socket_limit_enabled);
-#endif
-
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
@@ -1030,6 +987,10 @@ set_rcvbuf:
sk->sk_incoming_cpu = val;
break;
+ case SO_CNX_ADVICE:
+ if (val == 1)
+ dst_negative_advice(sk);
+ break;
default:
ret = -ENOPROTOOPT;
break;
@@ -1507,12 +1468,6 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
-static void sk_update_clone(const struct sock *sk, struct sock *newsk)
-{
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- sock_update_memcg(newsk);
-}
-
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1580,6 +1535,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk = NULL;
goto out;
}
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
newsk->sk_err = 0;
newsk->sk_priority = 0;
@@ -1607,7 +1563,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sk_set_socket(newsk, NULL);
newsk->sk_wq = NULL;
- sk_update_clone(sk, newsk);
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ sock_update_memcg(newsk);
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
@@ -2089,27 +2046,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
long allocated;
- int parent_status = UNDER_LIMIT;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = sk_memory_allocated_add(sk, amt, &parent_status);
+ allocated = sk_memory_allocated_add(sk, amt);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
+ goto suppress_allocation;
/* Under limit. */
- if (parent_status == UNDER_LIMIT &&
- allocated <= sk_prot_mem_limits(sk, 0)) {
+ if (allocated <= sk_prot_mem_limits(sk, 0)) {
sk_leave_memory_pressure(sk);
return 1;
}
- /* Under pressure. (we or our parents) */
- if ((parent_status > SOFT_LIMIT) ||
- allocated > sk_prot_mem_limits(sk, 1))
+ /* Under pressure. */
+ if (allocated > sk_prot_mem_limits(sk, 1))
sk_enter_memory_pressure(sk);
- /* Over hard limit (we or our parents) */
- if ((parent_status == OVER_LIMIT) ||
- (allocated > sk_prot_mem_limits(sk, 2)))
+ /* Over hard limit. */
+ if (allocated > sk_prot_mem_limits(sk, 2))
goto suppress_allocation;
/* guarantee minimum buffer size under pressure */
@@ -2158,6 +2115,9 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
+
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2173,6 +2133,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
sk_memory_allocated_sub(sk, amount);
sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
+
if (sk_under_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 1df98c557440..e92b759d906c 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -93,10 +93,17 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
* @sk2: Socket belonging to the existing reuseport group.
* May return ENOMEM and not add socket to group under memory pressure.
*/
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
struct sock_reuseport *reuse;
+ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+ int err = reuseport_alloc(sk2);
+
+ if (err)
+ return err;
+ }
+
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock)),
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139d710c..a6beb7b6ae55 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@ static int zero = 0;
static int one = 1;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "max_skb_frags",
+ .data = &sysctl_max_skb_frags,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &max_skb_frags,
+ },
{ }
};
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14932bd..b5672e5fe649 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -802,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
}
lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
@@ -824,26 +824,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v4_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c6d0508e63a..4663a01d5039 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -668,7 +668,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
if (!sk) {
@@ -691,26 +691,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v6_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -993,7 +993,7 @@ static struct proto dccp_v6_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 40b9ca72aae3..27bf03d11670 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -201,47 +201,6 @@ out:
return 0;
}
-static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
- const struct net_device *bridge,
- u16 vid_begin, u16 vid_end)
-{
- struct dsa_slave_priv *p;
- struct net_device *dev, *vlan_br;
- DECLARE_BITMAP(members, DSA_MAX_PORTS);
- DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
- u16 vid;
- int member, err;
-
- if (!ds->drv->vlan_getnext || !vid_begin)
- return -EOPNOTSUPP;
-
- vid = vid_begin - 1;
-
- do {
- err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
- if (err)
- break;
-
- if (vid > vid_end)
- break;
-
- member = find_first_bit(members, DSA_MAX_PORTS);
- if (member == DSA_MAX_PORTS)
- continue;
-
- dev = ds->ports[member];
- p = netdev_priv(dev);
- vlan_br = p->bridge_dev;
- if (vlan_br == bridge)
- continue;
-
- netdev_dbg(vlan_br, "hardware VLAN %d already in use\n", vid);
- return -EOPNOTSUPP;
- } while (vid < vid_end);
-
- return err == -ENOENT ? 0 : err;
-}
-
static int dsa_slave_port_vlan_add(struct net_device *dev,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans)
@@ -254,15 +213,6 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
return -EOPNOTSUPP;
- /* If the requested port doesn't belong to the same bridge as
- * the VLAN members, fallback to software VLAN (hopefully).
- */
- err = dsa_bridge_check_vlan_range(ds, p->bridge_dev,
- vlan->vid_begin,
- vlan->vid_end);
- if (err)
- return err;
-
err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
if (err)
return err;
@@ -293,41 +243,11 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- DECLARE_BITMAP(members, DSA_MAX_PORTS);
- DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
- u16 pvid, vid = 0;
- int err;
-
- if (!ds->drv->vlan_getnext || !ds->drv->port_pvid_get)
- return -EOPNOTSUPP;
-
- err = ds->drv->port_pvid_get(ds, p->port, &pvid);
- if (err)
- return err;
-
- for (;;) {
- err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
- if (err)
- break;
-
- if (!test_bit(p->port, members))
- continue;
-
- memset(vlan, 0, sizeof(*vlan));
- vlan->vid_begin = vlan->vid_end = vid;
- if (vid == pvid)
- vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+ if (ds->drv->port_vlan_dump)
+ return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
- if (test_bit(p->port, untagged))
- vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
- err = cb(&vlan->obj);
- if (err)
- break;
- }
-
- return err == -ENOENT ? 0 : err;
+ return -EOPNOTSUPP;
}
static int dsa_slave_port_fdb_add(struct net_device *dev,
@@ -385,31 +305,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
-/* Return a bitmask of all ports being currently bridged within a given bridge
- * device. Note that on leave, the mask will still return the bitmask of ports
- * currently bridged, prior to port removal, and this is exactly what we want.
- */
-static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
- struct net_device *bridge)
-{
- struct dsa_slave_priv *p;
- unsigned int port;
- u32 mask = 0;
-
- for (port = 0; port < DSA_MAX_PORTS; port++) {
- if (!dsa_is_port_initialized(ds, port))
- continue;
-
- p = netdev_priv(ds->ports[port]);
-
- if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT &&
- p->bridge_dev == bridge)
- mask |= 1 << port;
- }
-
- return mask;
-}
-
static int dsa_slave_stp_update(struct net_device *dev, u8 state)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -422,6 +317,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
return ret;
}
+static int dsa_slave_vlan_filtering(struct net_device *dev,
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ if (ds->drv->port_vlan_filtering)
+ return ds->drv->port_vlan_filtering(ds, p->port,
+ attr->u.vlan_filtering);
+
+ return 0;
+}
+
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -438,6 +351,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
ret = ds->drv->port_stp_update(ds, p->port,
attr->u.stp_state);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ ret = dsa_slave_vlan_filtering(dev, attr, trans);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -533,8 +449,7 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
p->bridge_dev = br;
if (ds->drv->port_join_bridge)
- ret = ds->drv->port_join_bridge(ds, p->port,
- dsa_slave_br_port_mask(ds, br));
+ ret = ds->drv->port_join_bridge(ds, p->port, br);
return ret;
}
@@ -547,8 +462,7 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
if (ds->drv->port_leave_bridge)
- ret = ds->drv->port_leave_bridge(ds, p->port,
- dsa_slave_br_port_mask(ds, p->bridge_dev));
+ ret = ds->drv->port_leave_bridge(ds, p->port);
p->bridge_dev = NULL;
@@ -1194,7 +1108,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
ret, slave_dev->name);
- phy_disconnect(p->phy);
ds->ports[port] = NULL;
free_netdev(slave_dev);
return ret;
@@ -1205,6 +1118,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
ret = dsa_slave_phy_setup(p, slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
+ unregister_netdev(slave_dev);
free_netdev(slave_dev);
return ret;
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 103871784e50..66dff5e3d772 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL(eth_header);
*/
u32 eth_get_headlen(void *data, unsigned int len)
{
+ const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
const struct ethhdr *eth = (const struct ethhdr *)data;
struct flow_keys keys;
@@ -134,7 +135,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
/* parse any remaining L2/L3 headers, check for L4 */
if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
- sizeof(*eth), len, 0))
+ sizeof(*eth), len, flags))
return max_t(u32, keys.control.thoff, sizeof(*eth));
/* parse for any L4 headers */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a548be247e15..e0bd013a1e5e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
static HLIST_HEAD(raw_head);
static DEFINE_RWLOCK(raw_lock);
-static void raw_hash(struct sock *sk)
+static int raw_hash(struct sock *sk)
{
write_lock_bh(&raw_lock);
sk_add_node(sk, &raw_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&raw_lock);
+
+ return 0;
}
static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
return container_of(sk, struct dgram_sock, sk);
}
-static void dgram_hash(struct sock *sk)
+static int dgram_hash(struct sock *sk)
{
write_lock_bh(&dgram_lock);
sk_add_node(sk, &dgram_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&dgram_lock);
+
+ return 0;
}
static void dgram_unhash(struct sock *sk)
@@ -1026,8 +1030,13 @@ static int ieee802154_create(struct net *net, struct socket *sock,
/* Checksums on by default */
sock_set_flag(sk, SOCK_ZAPPED);
- if (sk->sk_prot->hash)
- sk->sk_prot->hash(sk);
+ if (sk->sk_prot->hash) {
+ rc = sk->sk_prot->hash(sk);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c22920525e5d..238225b0c970 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
config NET_IP_TUNNEL
tristate
+ select DST_CACHE
default n
config NET_IPGRE
@@ -353,6 +354,7 @@ config INET_ESP
select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
+ select CRYPTO_ECHAINIV
---help---
Support for IPsec ESP.
@@ -404,14 +406,6 @@ config INET_XFRM_MODE_BEET
If unsure, say Y.
-config INET_LRO
- tristate "Large Receive Offload (ipv4/tcp)"
- default y
- ---help---
- Support for Large Receive Offload (ipv4/tcp).
-
- If unsure, say Y.
-
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c29809f765dc..bfa133691cde 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
-obj-$(CONFIG_INET_LRO) += inet_lro.o
obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
@@ -56,7 +55,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
-obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5c5db6636704..209d1ed28954 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -370,7 +370,11 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
@@ -1091,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p)
}
EXPORT_SYMBOL(inet_unregister_protosw);
-/*
- * Shall we try to damage output packets if routing dev changes?
- */
-
-int sysctl_ip_dynaddr __read_mostly;
-
static int inet_sk_reselect_saddr(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1127,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (new_saddr == old_saddr)
return 0;
- if (sysctl_ip_dynaddr > 1) {
+ if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
@@ -1142,8 +1140,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
* Besides that, it does not check for connection
* uniqueness. Wait for troubles.
*/
- __sk_prot_rehash(sk);
- return 0;
+ return __sk_prot_rehash(sk);
}
int inet_sk_rebuild_header(struct sock *sk)
@@ -1183,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sysctl_ip_dynaddr ||
+ if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59b3e0e8fd51..c102eb5ac55c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -735,6 +735,14 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
(!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
goto out;
+ /*
+ * For some 802.11 wireless deployments (and possibly other networks),
+ * there will be an ARP proxy and gratuitous ARP frames are attacks
+ * and thus should not be accepted.
+ */
+ if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
+ goto out;
+
/*
* Special case: We must set Frame Relay source Q.922 address
*/
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d31e65a..8c3df2ccba45 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1194,6 +1194,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
__be32 addr = 0;
struct in_device *in_dev;
struct net *net = dev_net(dev);
+ int master_idx;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -1214,12 +1215,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
if (addr)
goto out_unlock;
no_in_dev:
+ master_idx = l3mdev_master_ifindex_rcu(dev);
+
+ /* For VRFs, the VRF device takes the place of the loopback device,
+ * with addresses on it being preferred. Note in such cases the
+ * loopback device will be among the devices that fail the master_idx
+ * equality check in the loop below.
+ */
+ if (master_idx &&
+ (dev = dev_get_by_index_rcu(net, master_idx)) &&
+ (in_dev = __in_dev_get_rcu(dev))) {
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_scope != RT_SCOPE_LINK &&
+ ifa->ifa_scope <= scope) {
+ addr = ifa->ifa_local;
+ goto out_unlock;
+ }
+ } endfor_ifa(in_dev);
+ }
/* Not loopback addresses on loopback should be preferred
in this case. It is important that lo is the first interface
in dev_base list.
*/
for_each_netdev_rcu(net, dev) {
+ if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+ continue;
+
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
@@ -1847,7 +1869,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -2185,6 +2207,8 @@ static struct devinet_sysctl_table {
"igmpv3_unsolicited_report_interval"),
DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
"ignore_routes_with_linkdown"),
+ DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
+ "drop_gratuitous_arp"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@ -2192,6 +2216,8 @@ static struct devinet_sysctl_table {
"promote_secondaries"),
DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
"route_localnet"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
+ "drop_unicast_in_l2_multicast"),
},
};
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 744e5936c10d..d07fc076bea0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n);
- else if (n->tn_bits <= TNODE_KMALLOC_MAX)
- kfree(n);
else
- vfree(n);
+ kvfree(n);
}
#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
@@ -1396,9 +1394,10 @@ found:
struct fib_info *fi = fa->fa_info;
int nhsel, err;
- if ((index >= (1ul << fa->fa_slen)) &&
- ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
- continue;
+ if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+ if (index >= (1ul << fa->fa_slen))
+ continue;
+ }
if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
continue;
if (fi->fib_dead)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 976f0dcf6991..88dab0c1670c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -774,7 +774,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
uh->dest = e->dport;
uh->source = sport;
uh->len = htons(skb->len);
- uh->check = 0;
udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
fl4->saddr, fl4->daddr, skb->len);
@@ -784,11 +783,11 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4)
{
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+ SKB_GSO_UDP_TUNNEL;
__be16 sport;
- skb = iptunnel_handle_offloads(skb, csum, type);
+ skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -804,8 +803,8 @@ EXPORT_SYMBOL(fou_build_header);
int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4)
{
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+ SKB_GSO_UDP_TUNNEL;
struct guehdr *guehdr;
size_t hdrlen, optlen = 0;
__be16 sport;
@@ -814,7 +813,6 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
- csum = false;
optlen += GUE_PLEN_REMCSUM;
type |= SKB_GSO_TUNNEL_REMCSUM;
need_priv = true;
@@ -822,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
optlen += need_priv ? GUE_LEN_PRIV : 0;
- skb = iptunnel_handle_offloads(skb, csum, type);
+ skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5a8ee3282550..47f4c544c916 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -18,15 +18,13 @@
static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
struct sk_buff *segs = ERR_PTR(-EINVAL);
- netdev_features_t enc_features;
- int ghl;
- struct gre_base_hdr *greh;
u16 mac_offset = skb->mac_header;
- int mac_len = skb->mac_len;
__be16 protocol = skb->protocol;
- int tnl_hlen;
- bool csum;
+ u16 mac_len = skb->mac_len;
+ int gre_offset, outer_hlen;
+ bool need_csum, ufo;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_TCPV4 |
@@ -43,74 +41,74 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
if (!skb->encapsulation)
goto out;
- if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+ if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr)))
goto out;
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
-
- ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
- if (unlikely(ghl < sizeof(*greh)))
+ if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
- csum = !!(greh->flags & GRE_CSUM);
- if (csum)
- skb->encap_hdr_csum = 1;
-
/* setup inner skb. */
- skb->protocol = greh->protocol;
skb->encapsulation = 0;
-
- if (unlikely(!pskb_may_pull(skb, ghl)))
- goto out;
-
- __skb_pull(skb, ghl);
+ __skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
+ skb->protocol = skb->inner_protocol;
+
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
+ skb->encap_hdr_csum = need_csum;
+
+ ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+
+ features &= skb->dev->hw_enc_features;
+
+ /* The only checksum offload we care about from here on out is the
+ * outer one so strip the existing checksum feature flags based
+ * on the fact that we will be computing our checksum in software.
+ */
+ if (ufo) {
+ features &= ~NETIF_F_CSUM_MASK;
+ if (!need_csum)
+ features |= NETIF_F_HW_CSUM;
+ }
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & features;
- segs = skb_mac_gso_segment(skb, enc_features);
+ segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
- skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
+ skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+ mac_len);
goto out;
}
+ outer_hlen = skb_tnl_header_len(skb);
+ gre_offset = outer_hlen - tnl_hlen;
skb = segs;
- tnl_hlen = skb_tnl_header_len(skb);
do {
- __skb_push(skb, ghl);
- if (csum) {
- __be32 *pcsum;
-
- if (skb_has_shared_frag(skb)) {
- int err;
-
- err = __skb_linearize(skb);
- if (err) {
- kfree_skb_list(segs);
- segs = ERR_PTR(err);
- goto out;
- }
- }
-
- skb_reset_transport_header(skb);
+ struct gre_base_hdr *greh;
+ __be32 *pcsum;
- greh = (struct gre_base_hdr *)
- skb_transport_header(skb);
- pcsum = (__be32 *)(greh + 1);
- *pcsum = 0;
- *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
+ /* Set up inner headers if we are offloading inner checksum */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
}
- __skb_push(skb, tnl_hlen - ghl);
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ skb->mac_len = mac_len;
+ skb->protocol = protocol;
+ __skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
- skb->mac_len = mac_len;
- skb->protocol = protocol;
+ skb_set_transport_header(skb, gre_offset);
+
+ if (!need_csum)
+ continue;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ pcsum = (__be32 *)(greh + 1);
+
+ *pcsum = 0;
+ *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
} while ((skb = skb->next));
out:
return segs;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..2aea9f1a2a31 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,12 +107,6 @@
#include <linux/seq_file.h>
#endif
-#define IP_MAX_MEMBERSHIPS 20
-#define IP_MAX_MSF 10
-
-/* IGMP reports for link-local multicast groups are enabled by default */
-int sysctl_igmp_llm_reports __read_mostly = 1;
-
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
@@ -433,6 +427,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
int type, int gdeleted, int sdeleted)
{
struct net_device *dev = pmc->interface->dev;
+ struct net *net = dev_net(dev);
struct igmpv3_report *pih;
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -440,7 +435,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
@@ -543,6 +538,7 @@ empty_source:
static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
{
struct sk_buff *skb = NULL;
+ struct net *net = dev_net(in_dev->dev);
int type;
if (!pmc) {
@@ -551,7 +547,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !sysctl_igmp_llm_reports)
+ !net->ipv4.sysctl_igmp_llm_reports)
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -687,7 +683,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -766,9 +762,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
static void igmp_ifc_event(struct in_device *in_dev)
{
+ struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
igmp_ifc_start_timer(in_dev, 1);
}
@@ -858,12 +855,13 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
{
struct ip_mc_list *im;
+ struct net *net = dev_net(in_dev->dev);
/* Timers are only set for non-local groups */
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
return false;
rcu_read_lock();
@@ -887,6 +885,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
__be32 group = ih->group;
int max_delay;
int mark = 0;
+ struct net *net = dev_net(in_dev->dev);
if (len == 8) {
@@ -972,7 +971,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !sysctl_igmp_llm_reports)
+ !net->ipv4.sysctl_igmp_llm_reports)
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1088,6 +1087,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
{
struct ip_mc_list *pmc;
+ struct net *net = dev_net(in_dev->dev);
/* this is an "ip_mc_list" for convenience; only the fields below
* are actually used. In particular, the refcnt and users are not
@@ -1102,7 +1102,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1187,6 +1187,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
{
struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
+ struct net *net = dev_net(in_dev->dev);
int reporter;
#endif
@@ -1198,7 +1199,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return;
reporter = im->reporter;
@@ -1223,6 +1224,9 @@ static void igmp_group_dropped(struct ip_mc_list *im)
static void igmp_group_added(struct ip_mc_list *im)
{
struct in_device *in_dev = im->interface;
+#ifdef CONFIG_IP_MULTICAST
+ struct net *net = dev_net(in_dev->dev);
+#endif
if (im->loaded == 0) {
im->loaded = 1;
@@ -1232,7 +1236,7 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return;
if (in_dev->dead)
@@ -1245,7 +1249,7 @@ static void igmp_group_added(struct ip_mc_list *im)
}
/* else, v3 */
- im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
igmp_ifc_event(in_dev);
#endif
}
@@ -1314,6 +1318,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
{
struct ip_mc_list *im;
+#ifdef CONFIG_IP_MULTICAST
+ struct net *net = dev_net(in_dev->dev);
+#endif
ASSERT_RTNL();
@@ -1340,7 +1347,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
- im->unsolicit_count = sysctl_igmp_qrv;
+ im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
#endif
im->next_rcu = in_dev->mc_list;
@@ -1533,6 +1540,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
struct ip_mc_list *im;
int type;
+ struct net *net = dev_net(in_dev->dev);
ASSERT_RTNL();
@@ -1540,7 +1548,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !sysctl_igmp_llm_reports)
+ !net->ipv4.sysctl_igmp_llm_reports)
continue;
/* a failover is happening and switches
@@ -1639,6 +1647,9 @@ void ip_mc_down(struct in_device *in_dev)
void ip_mc_init_dev(struct in_device *in_dev)
{
+#ifdef CONFIG_IP_MULTICAST
+ struct net *net = dev_net(in_dev->dev);
+#endif
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
@@ -1646,7 +1657,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
(unsigned long)in_dev);
setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
(unsigned long)in_dev);
- in_dev->mr_qrv = sysctl_igmp_qrv;
+ in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
#endif
spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1657,11 +1668,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *pmc;
+#ifdef CONFIG_IP_MULTICAST
+ struct net *net = dev_net(in_dev->dev);
+#endif
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_qrv = sysctl_igmp_qrv;
+ in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
#endif
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
@@ -1727,11 +1741,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
/*
* Join a socket to a group
*/
-int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
-#ifdef CONFIG_IP_MULTICAST
-int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
-#endif
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
__be32 *psfsrc)
@@ -1756,6 +1765,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
#ifdef CONFIG_IP_MULTICAST
struct in_device *in_dev = pmc->interface;
+ struct net *net = dev_net(in_dev->dev);
#endif
/* no more filters for this source */
@@ -1766,7 +1776,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1824,12 +1834,13 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfcount[MCAST_INCLUDE]) {
#ifdef CONFIG_IP_MULTICAST
struct ip_sf_list *psf;
+ struct net *net = dev_net(in_dev->dev);
#endif
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
in_dev->mr_ifc_count = pmc->crcount;
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -1996,6 +2007,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
#ifdef CONFIG_IP_MULTICAST
struct ip_sf_list *psf;
+ struct net *net = dev_net(pmc->interface->dev);
in_dev = pmc->interface;
#endif
@@ -2007,7 +2019,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
in_dev->mr_ifc_count = pmc->crcount;
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2074,7 +2086,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
count++;
}
err = -ENOBUFS;
- if (count >= sysctl_igmp_max_memberships)
+ if (count >= net->ipv4.sysctl_igmp_max_memberships)
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2246,7 +2258,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
err = -ENOBUFS;
goto done;
}
@@ -2919,6 +2931,12 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
+ /* Sysctl initialization */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 46b9c887bede..bc5196ea1bdf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -89,161 +91,154 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
+ * We try to allocate an odd port (and leave even ports for connect())
*/
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ int ret = 1, attempts = 5, port = snum;
+ int smallest_size = -1, smallest_port;
struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret, attempts = 5;
struct net *net = sock_net(sk);
- int smallest_size = -1, smallest_rover;
+ int i, low, high, attempt_half;
+ struct inet_bind_bucket *tb;
kuid_t uid = sock_i_uid(sk);
- int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+ u32 remaining, offset;
- local_bh_disable();
- if (!snum) {
- int remaining, rover, low, high;
+ if (port) {
+have_port:
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port)
+ goto tb_found;
+ goto tb_not_found;
+ }
again:
- inet_get_local_port_range(net, &low, &high);
- if (attempt_half) {
- int half = low + ((high - low) >> 1);
-
- if (attempt_half == 1)
- high = half;
- else
- low = half;
- }
- remaining = (high - low) + 1;
- smallest_rover = rover = prandom_u32() % remaining + low;
-
- smallest_size = -1;
- do {
- if (inet_is_local_reserved_port(net, rover))
- goto next_nolock;
- head = &hashinfo->bhash[inet_bhashfn(net, rover,
- hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == rover) {
- if (((tb->fastreuse > 0 &&
- sk->sk_reuse &&
- sk->sk_state != TCP_LISTEN) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- uid_eq(tb->fastuid, uid))) &&
- (tb->num_owners < smallest_size || smallest_size == -1)) {
- smallest_size = tb->num_owners;
- smallest_rover = rover;
- }
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- snum = rover;
- goto tb_found;
- }
- goto next;
+ attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+other_half_scan:
+ inet_get_local_port_range(net, &low, &high);
+ high++; /* [32768, 60999] -> [32768, 61000[ */
+ if (high - low < 4)
+ attempt_half = 0;
+ if (attempt_half) {
+ int half = low + (((high - low) >> 2) << 1);
+
+ if (attempt_half == 1)
+ high = half;
+ else
+ low = half;
+ }
+ remaining = high - low;
+ if (likely(remaining > 1))
+ remaining &= ~1U;
+
+ offset = prandom_u32() % remaining;
+ /* __inet_hash_connect() favors ports having @low parity
+ * We do the opposite to not pollute connect() users.
+ */
+ offset |= 1U;
+ smallest_size = -1;
+ smallest_port = low; /* avoid compiler warning */
+
+other_parity_scan:
+ port = low + offset;
+ for (i = 0; i < remaining; i += 2, port += 2) {
+ if (unlikely(port >= high))
+ port -= remaining;
+ if (inet_is_local_reserved_port(net, port))
+ continue;
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port) {
+ if (((tb->fastreuse > 0 && reuse) ||
+ (tb->fastreuseport > 0 &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
+ (tb->num_owners < smallest_size || smallest_size == -1)) {
+ smallest_size = tb->num_owners;
+ smallest_port = port;
}
- break;
- next:
- spin_unlock(&head->lock);
- next_nolock:
- if (++rover > high)
- rover = low;
- } while (--remaining > 0);
-
- /* Exhausted local port range during search? It is not
- * possible for us to be holding one of the bind hash
- * locks if this test triggers, because if 'remaining'
- * drops to zero, we broke out of the do/while loop at
- * the top level, not from the 'break;' statement.
- */
- ret = 1;
- if (remaining <= 0) {
- if (smallest_size != -1) {
- snum = smallest_rover;
- goto have_snum;
- }
- if (attempt_half == 1) {
- /* OK we now try the upper half of the range */
- attempt_half = 2;
- goto again;
+ if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
+ goto tb_found;
+ goto next_port;
}
- goto fail;
- }
- /* OK, here is the one we will use. HEAD is
- * non-NULL and we hold it's mutex.
- */
- snum = rover;
- } else {
-have_snum:
- head = &hashinfo->bhash[inet_bhashfn(net, snum,
- hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == snum)
- goto tb_found;
+ goto tb_not_found;
+next_port:
+ spin_unlock_bh(&head->lock);
+ cond_resched();
+ }
+
+ if (smallest_size != -1) {
+ port = smallest_port;
+ goto have_port;
+ }
+ offset--;
+ if (!(offset & 1))
+ goto other_parity_scan;
+
+ if (attempt_half == 1) {
+ /* OK we now try the upper half of the range */
+ attempt_half = 2;
+ goto other_half_scan;
}
- tb = NULL;
- goto tb_not_found;
+ return ret;
+
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port);
+ if (!tb)
+ goto fail_unlock;
tb_found:
if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
- if (((tb->fastreuse > 0 &&
- sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+ if (((tb->fastreuse > 0 && reuse) ||
(tb->fastreuseport > 0 &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ smallest_size == -1)
goto success;
- } else {
- ret = 1;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
- if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size != -1 && --attempts >= 0) {
- spin_unlock(&head->lock);
- goto again;
- }
-
- goto fail_unlock;
+ if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
+ if ((reuse ||
+ (tb->fastreuseport > 0 &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
+ smallest_size != -1 && --attempts >= 0) {
+ spin_unlock_bh(&head->lock);
+ goto again;
}
+ goto fail_unlock;
}
- }
-tb_not_found:
- ret = 1;
- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
- net, head, snum)) == NULL)
- goto fail_unlock;
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
- tb->fastreuse = 1;
- else
+ if (!reuse)
tb->fastreuse = 0;
+ if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ tb->fastreuseport = 0;
+ } else {
+ tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = 1;
tb->fastuid = uid;
- } else
- tb->fastreuseport = 0;
- } else {
- if (tb->fastreuse &&
- (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
- tb->fastreuse = 0;
- if (tb->fastreuseport &&
- (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+ } else {
tb->fastreuseport = 0;
+ }
}
success:
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, snum);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
ret = 0;
fail_unlock:
- spin_unlock(&head->lock);
-fail:
- local_bh_enable();
+ spin_unlock_bh(&head->lock);
return ret;
}
EXPORT_SYMBOL_GPL(inet_csk_get_port);
@@ -482,10 +477,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
#define AF_INET_FAMILY(fam) true
#endif
-/* Only thing we need from tcp.h */
-extern int sysctl_tcp_synack_retries;
-
-
/* Decide when to expire the request and when to resend SYN-ACK */
static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
const int max_retries,
@@ -557,6 +548,7 @@ static void reqsk_timer_handler(unsigned long data)
{
struct request_sock *req = (struct request_sock *)data;
struct sock *sk_listener = req->rsk_listener;
+ struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
int qlen, expire = 0, resend = 0;
@@ -566,7 +558,7 @@ static void reqsk_timer_handler(unsigned long data)
if (sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
- max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+ max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
thresh = max_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
@@ -737,6 +729,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -754,13 +747,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
@@ -789,14 +783,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
reqsk_put(req);
}
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
- struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
+ child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
@@ -808,6 +804,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
sk_acceptq_added(sk);
}
spin_unlock(&queue->rskq_lock);
+ return child;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@ -817,11 +814,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
if (own_req) {
inet_csk_reqsk_queue_drop(sk, req);
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ if (inet_csk_reqsk_queue_add(sk, req, child))
+ return child;
}
/* Too bad, another child took ownership of the request, undo. */
bh_unlock_sock(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8bb8e7ad8548..50c0d96b8441 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -357,17 +357,24 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
struct sock *sk;
if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6)
- sk = inet6_lookup(net, hashinfo,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- req->id.idiag_if);
+ else if (req->sdiag_family == AF_INET6) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
+ req->id.idiag_dport, req->id.idiag_src[3],
+ req->id.idiag_sport, req->id.idiag_if);
+ else
+ sk = inet6_lookup(net, hashinfo, NULL, 0,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ req->id.idiag_if);
+ }
#endif
else
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ccc5980797fc..bc68eced0105 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
@@ -205,6 +207,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif)
@@ -214,6 +217,7 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
rcu_read_lock();
@@ -229,6 +233,15 @@ begin:
if (reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -246,11 +259,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -449,32 +464,74 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
}
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
-void __inet_hash(struct sock *sk, struct sock *osk)
+static int inet_reuseport_add_sock(struct sock *sk,
+ struct inet_listen_hashbucket *ilb,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct sock *sk2;
+ struct hlist_nulls_node *node;
+ kuid_t uid = sock_i_uid(sk);
+
+ sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ saddr_same(sk, sk2, false))
+ return reuseport_add_sock(sk, sk2);
+ }
+
+ /* Initial allocation may have already happened via setsockopt */
+ if (!rcu_access_pointer(sk->sk_reuseport_cb))
+ return reuseport_alloc(sk);
+ return 0;
+}
+
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
+ int err = 0;
if (sk->sk_state != TCP_LISTEN) {
inet_ehash_nolisten(sk, osk);
- return;
+ return 0;
}
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
+ if (sk->sk_reuseport) {
+ err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ if (err)
+ goto unlock;
+ }
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+unlock:
spin_unlock(&ilb->lock);
+
+ return err;
}
EXPORT_SYMBOL(__inet_hash);
-void inet_hash(struct sock *sk)
+int inet_hash(struct sock *sk)
{
+ int err = 0;
+
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk, NULL);
+ err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
local_bh_enable();
}
+
+ return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
@@ -493,6 +550,8 @@ void inet_unhash(struct sock *sk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
done = __sk_nulls_del_node_init_rcu(sk);
if (done)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
@@ -506,106 +565,106 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
- const unsigned short snum = inet_sk(sk)->inet_num;
+ struct inet_timewait_sock *tw = NULL;
struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
+ int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ struct inet_bind_bucket *tb;
+ u32 remaining, offset;
+ int ret, i, low, high;
+ static u32 hint;
+
+ if (port) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+ inet_ehash_nolisten(sk, NULL);
+ spin_unlock_bh(&head->lock);
+ return 0;
+ }
+ spin_unlock(&head->lock);
+ /* No definite answer... Walk to established hash table */
+ ret = check_established(death_row, sk, port, NULL);
+ local_bh_enable();
+ return ret;
+ }
- if (!snum) {
- int i, remaining, low, high, port;
- static u32 hint;
- u32 offset = hint + port_offset;
- struct inet_timewait_sock *tw = NULL;
+ inet_get_local_port_range(net, &low, &high);
+ high++; /* [32768, 60999] -> [32768, 61000[ */
+ remaining = high - low;
+ if (likely(remaining > 1))
+ remaining &= ~1U;
- inet_get_local_port_range(net, &low, &high);
- remaining = (high - low) + 1;
+ offset = (hint + port_offset) % remaining;
+ /* In first pass we try ports of @low parity.
+ * inet_csk_get_port() does the opposite choice.
+ */
+ offset &= ~1U;
+other_parity_scan:
+ port = low + offset;
+ for (i = 0; i < remaining; i += 2, port += 2) {
+ if (unlikely(port >= high))
+ port -= remaining;
+ if (inet_is_local_reserved_port(net, port))
+ continue;
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
- /* By starting with offset being an even number,
- * we tend to leave about 50% of ports for other uses,
- * like bind(0).
+ /* Does not bother with rcv_saddr checks, because
+ * the established check is already unique enough.
*/
- offset &= ~1;
-
- local_bh_disable();
- for (i = 0; i < remaining; i++) {
- port = low + (i + offset) % remaining;
- if (inet_is_local_reserved_port(net, port))
- continue;
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
- * because the established check is already
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), net) &&
- tb->port == port) {
- if (tb->fastreuse >= 0 ||
- tb->fastreuseport >= 0)
- goto next_port;
- WARN_ON(hlist_empty(&tb->owners));
- if (!check_established(death_row, sk,
- port, &tw))
- goto ok;
+ inet_bind_bucket_for_each(tb, &head->chain) {
+ if (net_eq(ib_net(tb), net) && tb->port == port) {
+ if (tb->fastreuse >= 0 ||
+ tb->fastreuseport >= 0)
goto next_port;
- }
+ WARN_ON(hlist_empty(&tb->owners));
+ if (!check_established(death_row, sk,
+ port, &tw))
+ goto ok;
+ goto next_port;
}
-
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
- }
- tb->fastreuse = -1;
- tb->fastreuseport = -1;
- goto ok;
-
- next_port:
- spin_unlock(&head->lock);
}
- local_bh_enable();
-
- return -EADDRNOTAVAIL;
-ok:
- hint += (i + 2) & ~1;
-
- /* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
- if (sk_unhashed(sk)) {
- inet_sk(sk)->inet_sport = htons(port);
- inet_ehash_nolisten(sk, (struct sock *)tw);
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port);
+ if (!tb) {
+ spin_unlock_bh(&head->lock);
+ return -ENOMEM;
}
- if (tw)
- inet_twsk_bind_unhash(tw, hinfo);
- spin_unlock(&head->lock);
+ tb->fastreuse = -1;
+ tb->fastreuseport = -1;
+ goto ok;
+next_port:
+ spin_unlock_bh(&head->lock);
+ cond_resched();
+ }
- if (tw)
- inet_twsk_deschedule_put(tw);
+ offset++;
+ if ((offset & 1) && remaining > 1)
+ goto other_parity_scan;
- ret = 0;
- goto out;
- }
+ return -EADDRNOTAVAIL;
- head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- inet_ehash_nolisten(sk, NULL);
- spin_unlock_bh(&head->lock);
- return 0;
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
- ret = check_established(death_row, sk, snum, NULL);
-out:
- local_bh_enable();
- return ret;
+ok:
+ hint += i + 2;
+
+ /* Head lock still held and bh's disabled */
+ inet_bind_hash(sk, tb, port);
+ if (sk_unhashed(sk)) {
+ inet_sk(sk)->inet_sport = htons(port);
+ inet_ehash_nolisten(sk, (struct sock *)tw);
}
+ if (tw)
+ inet_twsk_bind_unhash(tw, hinfo);
+ spin_unlock(&head->lock);
+ if (tw)
+ inet_twsk_deschedule_put(tw);
+ local_bh_enable();
+ return 0;
}
/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
deleted file mode 100644
index f17ea49b28fb..000000000000
--- a/net/ipv4/inet_lro.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * linux/net/ipv4/inet_lro.c
- *
- * Large Receive Offload (ipv4 / tcp)
- *
- * (C) Copyright IBM Corp. 2007
- *
- * Authors:
- * Jan-Bernd Themann <themann@de.ibm.com>
- * Christoph Raisch <raisch@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <linux/module.h>
-#include <linux/if_vlan.h>
-#include <linux/inet_lro.h>
-#include <net/checksum.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
-MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
-
-#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
-#define IP_HDR_LEN(iph) (iph->ihl << 2)
-#define TCP_PAYLOAD_LENGTH(iph, tcph) \
- (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
-
-#define IPH_LEN_WO_OPTIONS 5
-#define TCPH_LEN_WO_OPTIONS 5
-#define TCPH_LEN_W_TIMESTAMP 8
-
-#define LRO_MAX_PG_HLEN 64
-
-#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
-
-/*
- * Basic tcp checks whether packet is suitable for LRO
- */
-
-static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
- int len, const struct net_lro_desc *lro_desc)
-{
- /* check ip header: don't aggregate padded frames */
- if (ntohs(iph->tot_len) != len)
- return -1;
-
- if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
- return -1;
-
- if (iph->ihl != IPH_LEN_WO_OPTIONS)
- return -1;
-
- if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
- tcph->rst || tcph->syn || tcph->fin)
- return -1;
-
- if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
- return -1;
-
- if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
- tcph->doff != TCPH_LEN_W_TIMESTAMP)
- return -1;
-
- /* check tcp options (only timestamp allowed) */
- if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
- __be32 *topt = (__be32 *)(tcph + 1);
-
- if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
- return -1;
-
- /* timestamp should be in right order */
- topt++;
- if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
- ntohl(*topt)))
- return -1;
-
- /* timestamp reply should not be zero */
- topt++;
- if (*topt == 0)
- return -1;
- }
-
- return 0;
-}
-
-static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
-{
- struct iphdr *iph = lro_desc->iph;
- struct tcphdr *tcph = lro_desc->tcph;
- __be32 *p;
- __wsum tcp_hdr_csum;
-
- tcph->ack_seq = lro_desc->tcp_ack;
- tcph->window = lro_desc->tcp_window;
-
- if (lro_desc->tcp_saw_tstamp) {
- p = (__be32 *)(tcph + 1);
- *(p+2) = lro_desc->tcp_rcv_tsecr;
- }
-
- csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
- iph->tot_len = htons(lro_desc->ip_tot_len);
-
- tcph->check = 0;
- tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
- lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
- tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- lro_desc->ip_tot_len -
- IP_HDR_LEN(iph), IPPROTO_TCP,
- lro_desc->data_csum);
-}
-
-static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
-{
- __wsum tcp_csum;
- __wsum tcp_hdr_csum;
- __wsum tcp_ps_hdr_csum;
-
- tcp_csum = ~csum_unfold(tcph->check);
- tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
-
- tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- len + TCP_HDR_LEN(tcph),
- IPPROTO_TCP, 0);
-
- return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
- tcp_ps_hdr_csum);
-}
-
-static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
-{
- int nr_frags;
- __be32 *ptr;
- u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
-
- nr_frags = skb_shinfo(skb)->nr_frags;
- lro_desc->parent = skb;
- lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
- lro_desc->iph = iph;
- lro_desc->tcph = tcph;
- lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
- lro_desc->tcp_ack = tcph->ack_seq;
- lro_desc->tcp_window = tcph->window;
-
- lro_desc->pkt_aggr_cnt = 1;
- lro_desc->ip_tot_len = ntohs(iph->tot_len);
-
- if (tcph->doff == 8) {
- ptr = (__be32 *)(tcph+1);
- lro_desc->tcp_saw_tstamp = 1;
- lro_desc->tcp_rcv_tsval = *(ptr+1);
- lro_desc->tcp_rcv_tsecr = *(ptr+2);
- }
-
- lro_desc->mss = tcp_data_len;
- lro_desc->active = 1;
-
- lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
- tcp_data_len);
-}
-
-static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
-{
- memset(lro_desc, 0, sizeof(struct net_lro_desc));
-}
-
-static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
- struct tcphdr *tcph, int tcp_data_len)
-{
- struct sk_buff *parent = lro_desc->parent;
- __be32 *topt;
-
- lro_desc->pkt_aggr_cnt++;
- lro_desc->ip_tot_len += tcp_data_len;
- lro_desc->tcp_next_seq += tcp_data_len;
- lro_desc->tcp_window = tcph->window;
- lro_desc->tcp_ack = tcph->ack_seq;
-
- /* don't update tcp_rcv_tsval, would not work with PAWS */
- if (lro_desc->tcp_saw_tstamp) {
- topt = (__be32 *) (tcph + 1);
- lro_desc->tcp_rcv_tsecr = *(topt + 2);
- }
-
- lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
- lro_tcp_data_csum(iph, tcph,
- tcp_data_len),
- parent->len);
-
- parent->len += tcp_data_len;
- parent->data_len += tcp_data_len;
- if (tcp_data_len > lro_desc->mss)
- lro_desc->mss = tcp_data_len;
-}
-
-static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
- struct iphdr *iph, struct tcphdr *tcph)
-{
- struct sk_buff *parent = lro_desc->parent;
- int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
-
- lro_add_common(lro_desc, iph, tcph, tcp_data_len);
-
- skb_pull(skb, (skb->len - tcp_data_len));
- parent->truesize += skb->truesize;
-
- if (lro_desc->last_skb)
- lro_desc->last_skb->next = skb;
- else
- skb_shinfo(parent)->frag_list = skb;
-
- lro_desc->last_skb = skb;
-}
-
-
-static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- if ((lro_desc->iph->saddr != iph->saddr) ||
- (lro_desc->iph->daddr != iph->daddr) ||
- (lro_desc->tcph->source != tcph->source) ||
- (lro_desc->tcph->dest != tcph->dest))
- return -1;
- return 0;
-}
-
-static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
- struct net_lro_desc *lro_arr,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- struct net_lro_desc *lro_desc = NULL;
- struct net_lro_desc *tmp;
- int max_desc = lro_mgr->max_desc;
- int i;
-
- for (i = 0; i < max_desc; i++) {
- tmp = &lro_arr[i];
- if (tmp->active)
- if (!lro_check_tcp_conn(tmp, iph, tcph)) {
- lro_desc = tmp;
- goto out;
- }
- }
-
- for (i = 0; i < max_desc; i++) {
- if (!lro_arr[i].active) {
- lro_desc = &lro_arr[i];
- goto out;
- }
- }
-
- LRO_INC_STATS(lro_mgr, no_desc);
-out:
- return lro_desc;
-}
-
-static void lro_flush(struct net_lro_mgr *lro_mgr,
- struct net_lro_desc *lro_desc)
-{
- if (lro_desc->pkt_aggr_cnt > 1)
- lro_update_tcp_ip_header(lro_desc);
-
- skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
-
- if (lro_mgr->features & LRO_F_NAPI)
- netif_receive_skb(lro_desc->parent);
- else
- netif_rx(lro_desc->parent);
-
- LRO_INC_STATS(lro_mgr, flushed);
- lro_clear_desc(lro_desc);
-}
-
-static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
- void *priv)
-{
- struct net_lro_desc *lro_desc;
- struct iphdr *iph;
- struct tcphdr *tcph;
- u64 flags;
- int vlan_hdr_len = 0;
-
- if (!lro_mgr->get_skb_header ||
- lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
- &flags, priv))
- goto out;
-
- if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
- goto out;
-
- lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
- if (!lro_desc)
- goto out;
-
- if ((skb->protocol == htons(ETH_P_8021Q)) &&
- !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
- vlan_hdr_len = VLAN_HLEN;
-
- if (!lro_desc->active) { /* start new lro session */
- if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
- goto out;
-
- skb->ip_summed = lro_mgr->ip_summed_aggr;
- lro_init_desc(lro_desc, skb, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
- return 0;
- }
-
- if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
- goto out2;
-
- if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
- goto out2;
-
- lro_add_packet(lro_desc, skb, iph, tcph);
- LRO_INC_STATS(lro_mgr, aggregated);
-
- if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
- lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
- lro_flush(lro_mgr, lro_desc);
-
- return 0;
-
-out2: /* send aggregated SKBs to stack */
- lro_flush(lro_mgr, lro_desc);
-
-out:
- return 1;
-}
-
-void lro_receive_skb(struct net_lro_mgr *lro_mgr,
- struct sk_buff *skb,
- void *priv)
-{
- if (__lro_proc_skb(lro_mgr, skb, priv)) {
- if (lro_mgr->features & LRO_F_NAPI)
- netif_receive_skb(skb);
- else
- netif_rx(skb);
- }
-}
-EXPORT_SYMBOL(lro_receive_skb);
-
-void lro_flush_all(struct net_lro_mgr *lro_mgr)
-{
- int i;
- struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
-
- for (i = 0; i < lro_mgr->max_desc; i++) {
- if (lro_desc[i].active)
- lro_flush(lro_mgr, &lro_desc[i]);
- }
-}
-EXPORT_SYMBOL(lro_flush_all);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3f00810b7288..efbd47d1a531 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,8 +54,6 @@
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
* as well. Or notify me, at least. --ANK
*/
-
-static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
struct ipfrag_skb_cb
@@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
qp->daddr = arg->iph->daddr;
qp->vif = arg->vif;
qp->user = arg->user;
- qp->peer = sysctl_ipfrag_max_dist ?
+ qp->peer = q->net->max_dist ?
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
NULL;
}
@@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
- unsigned int max = sysctl_ipfrag_max_dist;
+ unsigned int max = qp->q.net->max_dist;
unsigned int start, end;
int rc;
@@ -661,6 +659,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+ skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
@@ -748,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "ipfrag_max_dist",
+ .data = &init_net.ipv4.frags.max_dist,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero
+ },
{ }
};
@@ -761,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
- .procname = "ipfrag_max_dist",
- .data = &sysctl_ipfrag_max_dist,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
- },
{ }
};
@@ -789,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[1].data = &net->ipv4.frags.low_thresh;
table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
+ table[3].data = &net->ipv4.frags.max_dist;
}
hdr = register_net_sysctl(net, "net/ipv4", table);
@@ -864,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net)
*/
net->ipv4.frags.timeout = IP_FRAG_TIME;
+ net->ipv4.frags.max_dist = 64;
+
res = inet_frags_init_net(&net->ipv4.frags);
if (res)
return res;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c51c4e1661f..202437d6087b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -238,7 +238,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
return -EINVAL;
}
}
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
}
static void ipgre_err(struct sk_buff *skb, u32 info,
@@ -440,6 +440,17 @@ drop:
return 0;
}
+static __sum16 gre_checksum(struct sk_buff *skb)
+{
+ __wsum csum;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ csum = lco_csum(skb);
+ else
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ return csum_fold(csum);
+}
+
static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
__be16 proto, __be32 key, __be32 seq)
{
@@ -467,8 +478,7 @@ static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
+ *(__sum16 *)ptr = gre_checksum(skb);
}
}
}
@@ -493,8 +503,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
bool csum)
{
- return iptunnel_handle_offloads(skb, csum,
- csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+ return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
static struct rtable *gre_get_rt(struct sk_buff *skb,
@@ -531,9 +540,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- rt = gre_get_rt(skb, dev, &fl, key);
- if (IS_ERR(rt))
- goto err_free_skb;
+ rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
+ NULL;
+ if (!rt) {
+ rt = gre_get_rt(skb, dev, &fl, key);
+ if (IS_ERR(rt))
+ goto err_free_skb;
+ if (!skb->mark)
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+ fl.saddr);
+ }
tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
@@ -1054,8 +1070,9 @@ static const struct net_device_ops gre_tap_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->netdev_ops = &gre_tap_netdev_ops;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
@@ -1240,6 +1257,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
err = ipgre_newlink(net, dev, tb, NULL);
if (err < 0)
goto out;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
return dev;
out:
free_netdev(dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1209b63381f..e3d782746d9d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -308,15 +308,15 @@ drop:
return true;
}
-int sysctl_ip_early_demux __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_ip_early_demux);
-
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
- if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+ if (net->ipv4.sysctl_ip_early_demux &&
+ !skb_dst(skb) &&
+ !skb->sk &&
+ !ip_is_fragment(iph)) {
const struct net_protocol *ipprot;
int protocol = iph->protocol;
@@ -359,8 +359,31 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
- } else if (rt->rt_type == RTN_BROADCAST)
+ } else if (rt->rt_type == RTN_BROADCAST) {
IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
+ } else if (skb->pkt_type == PACKET_BROADCAST ||
+ skb->pkt_type == PACKET_MULTICAST) {
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+ /* RFC 1122 3.3.6:
+ *
+ * When a host sends a datagram to a link-layer broadcast
+ * address, the IP destination address MUST be a legal IP
+ * broadcast or IP multicast address.
+ *
+ * A host SHOULD silently discard a datagram that is received
+ * via a link-layer broadcast (see Section 2.4) but does not
+ * specify an IP multicast or broadcast destination address.
+ *
+ * This doesn't explicitly say L2 *broadcast*, but broadcast is
+ * in a way a form of multicast and the most common use case for
+ * this is 802.11 protecting against cross-station spoofing (the
+ * so-called "hole-196" attack) so do it for both.
+ */
+ if (in_dev &&
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ goto drop;
+ }
return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 512a44778cf2..f734c42acdaf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -79,9 +79,6 @@
#include <linux/netlink.h>
#include <linux/tcp.h>
-int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
-EXPORT_SYMBOL(sysctl_ip_default_ttl);
-
static int
ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
unsigned int mtu,
@@ -239,6 +236,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
* from host network stack.
*/
features = netif_skb_features(skb);
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c03e27..035ad645a8d9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
@@ -571,6 +573,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
int val = 0, err;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
@@ -910,7 +913,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
kfree(msf);
err = -ENOBUFS;
break;
@@ -1065,7 +1068,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sysctl_igmp_max_msf) {
+ gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
err = -ENOBUFS;
goto mc_msf_out;
}
@@ -1340,10 +1343,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
val = inet->tos;
break;
case IP_TTL:
+ {
+ struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- sysctl_ip_default_ttl :
+ net->ipv4.sysctl_ip_default_ttl :
inet->uc_ttl);
break;
+ }
case IP_HDRINCL:
val = inet->hdrincl;
break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c7bd72e9b544..dff8a05739a2 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- struct dst_entry *dst, __be32 saddr)
-{
- struct dst_entry *old_dst;
-
- dst_clone(dst);
- old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
- dst_release(old_dst);
- idst->saddr = saddr;
-}
-
-static noinline void tunnel_dst_set(struct ip_tunnel *t,
- struct dst_entry *dst, __be32 saddr)
-{
- __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
-}
-
-static void tunnel_dst_reset(struct ip_tunnel *t)
-{
- tunnel_dst_set(t, NULL, 0);
-}
-
-void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
-{
- int i;
-
- for_each_possible_cpu(i)
- __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
-}
-EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-
-static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
- u32 cookie, __be32 *saddr)
-{
- struct ip_tunnel_dst *idst;
- struct dst_entry *dst;
-
- rcu_read_lock();
- idst = raw_cpu_ptr(t->dst_cache);
- dst = rcu_dereference(idst->dst);
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
- dst = NULL;
- if (dst) {
- if (!dst->obsolete || dst->ops->check(dst, cookie)) {
- *saddr = idst->saddr;
- } else {
- tunnel_dst_reset(t);
- dst_release(dst);
- dst = NULL;
- }
- }
- rcu_read_unlock();
- return (struct rtable *)dst;
-}
-
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
@@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
@@ -729,7 +675,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
- rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+ NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -739,7 +686,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
if (connected)
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
}
if (rt->dst.dev == dev) {
@@ -836,7 +784,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
@@ -943,17 +891,31 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ if (new_mtu < 68)
return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
@@ -961,7 +923,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells);
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1155,15 +1117,15 @@ int ip_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
+ err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (err) {
free_percpu(dev->tstats);
- return -ENOMEM;
+ return err;
}
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
return err;
}
@@ -1193,7 +1155,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(itn, netdev_priv(dev));
- ip_tunnel_dst_reset_all(tunnel);
+ dst_cache_reset(&tunnel->dst_cache);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 859d415c0b2d..eaca2449a09a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
+ bool xnet)
{
if (unlikely(!pskb_may_pull(skb, hdr_len)))
return -ENOMEM;
@@ -109,13 +110,10 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
skb->protocol = inner_proto;
}
- nf_reset(skb);
- secpath_reset(skb);
skb_clear_hash_if_not_l4(skb);
- skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
- skb->pkt_type = PACKET_HOST;
+ skb_scrub_packet(skb, xnet);
return 0;
}
EXPORT_SYMBOL_GPL(iptunnel_pull_header);
@@ -148,7 +146,6 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
int gso_type_mask)
{
int err;
@@ -166,20 +163,15 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
return skb;
}
- /* If packet is not gso and we are resolving any partial checksum,
- * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
- * on the outer header without confusing devices that implement
- * NETIF_F_IP_CSUM with encapsulation.
- */
- if (csum_help)
- skb->encapsulation = 0;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_NONE;
+ /* We clear encapsulation here to prevent badly-written
+ * drivers potentially deciding to offload an inner checksum
+ * if we set CHECKSUM_PARTIAL on the outer header.
+ * This should go away when the drivers are all fixed.
+ */
+ skb->encapsulation = 0;
+ }
return skb;
error:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67f7c9de0b16..2ed9dd2b5f2f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
/* Persistent data: */
+#ifdef IPCONFIG_DYNAMIC
static int ic_proto_used; /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4044da61e747..ec51d02166de 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto))
+ if (iptunnel_pull_header(skb, 0, tpi.proto, false))
goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
@@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error;
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
if (IS_ERR(skb))
goto out;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 5fdc556514ba..7b8fbb352877 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -21,6 +21,7 @@ static struct iphdr *
synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph;
+ struct net *net = sock_net(skb->sk);
skb_reset_network_header(skb);
iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
@@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = sysctl_ip_default_ttl;
+ iph->ttl = net->ipv4.sysctl_ip_default_ttl;
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 6fb869f646bf..a04dee536b8e 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
{
int err;
- skb_orphan(skb);
-
local_bh_disable();
err = ip_defrag(net, skb, user);
local_bh_enable();
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c117b21b937d..76dce90c4581 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
}
EXPORT_SYMBOL_GPL(ping_get_port);
-void ping_hash(struct sock *sk)
+int ping_hash(struct sock *sk)
{
pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
+
+ return 0;
}
void ping_unhash(struct sock *sk)
@@ -746,8 +748,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3abd9d7a3adf..9f665b63a927 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- sysctl_ip_default_ttl);
+ net->ipv4.sysctl_ip_default_ttl);
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc35f1842512..8d22de74080c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk)
+int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
@@ -547,8 +549,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(net, msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e429c6..02c62299d717 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
/*
* Interface to generic destination cache.
*/
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, 0);
+ 0, jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe)
+ if (fnhe) {
rth = rcu_dereference(fnhe->fnhe_rth_input);
- else
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
+ }
+ }
+
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+rt_cache:
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
- if (fnhe)
+ if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- else {
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
+ rth = rcu_dereference(*prth);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(nh, fl4->daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
+
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
+
+rt_cache:
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
}
#ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 643a86c49020..ba0dcffada3b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,8 +19,6 @@
#include <net/tcp.h>
#include <net/route.h>
-extern int sysctl_tcp_syncookies;
-
static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
#define COOKIEBITS 24 /* Upper bits store count */
@@ -307,7 +305,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
struct flowi4 fl4;
- if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 46ce410703b1..1e1fe6086dd9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,7 +24,6 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
-#include <net/tcp_memcontrol.h>
static int zero;
static int one = 1;
@@ -284,31 +283,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "ip_default_ttl",
- .data = &sysctl_ip_default_ttl,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &ip_ttl_min,
- .extra2 = &ip_ttl_max,
- },
- {
- .procname = "tcp_syn_retries",
- .data = &sysctl_tcp_syn_retries,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &tcp_syn_retries_min,
- .extra2 = &tcp_syn_retries_max
- },
- {
- .procname = "tcp_synack_retries",
- .data = &sysctl_tcp_synack_retries,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
.maxlen = sizeof(int),
@@ -323,51 +297,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "ip_early_demux",
- .data = &sysctl_ip_early_demux,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "ip_dynaddr",
- .data = &sysctl_ip_dynaddr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_retries1",
- .data = &sysctl_tcp_retries1,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra2 = &tcp_retr1_max
- },
- {
- .procname = "tcp_retries2",
- .data = &sysctl_tcp_retries2,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_fin_timeout",
- .data = &sysctl_tcp_fin_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
-#ifdef CONFIG_SYN_COOKIES
- {
- .procname = "tcp_syncookies",
- .data = &sysctl_tcp_syncookies,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
-#endif
- {
.procname = "tcp_fastopen",
.data = &sysctl_tcp_fastopen,
.maxlen = sizeof(int),
@@ -416,30 +345,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "igmp_max_memberships",
- .data = &sysctl_igmp_max_memberships,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "igmp_max_msf",
- .data = &sysctl_igmp_max_msf,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
-#ifdef CONFIG_IP_MULTICAST
- {
- .procname = "igmp_qrv",
- .data = &sysctl_igmp_qrv,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
-#endif
- {
.procname = "inet_peer_threshold",
.data = &inet_peer_threshold,
.maxlen = sizeof(int),
@@ -461,13 +366,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "tcp_orphan_retries",
- .data = &sysctl_tcp_orphan_retries,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_fack",
.data = &sysctl_tcp_fack,
.maxlen = sizeof(int),
@@ -482,13 +380,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "tcp_reordering",
- .data = &sysctl_tcp_reordering,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_reordering",
.data = &sysctl_tcp_max_reordering,
.maxlen = sizeof(int),
@@ -518,13 +409,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &one,
},
{
- .procname = "tcp_notsent_lowat",
- .data = &sysctl_tcp_notsent_lowat,
- .maxlen = sizeof(sysctl_tcp_notsent_lowat),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
@@ -846,6 +730,29 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "ip_dynaddr",
+ .data = &init_net.ipv4.sysctl_ip_dynaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ip_early_demux",
+ .data = &init_net.ipv4.sysctl_ip_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ip_default_ttl",
+ .data = &init_net.ipv4.sysctl_ip_default_ttl,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &ip_ttl_min,
+ .extra2 = &ip_ttl_max,
+ },
+ {
.procname = "ip_local_port_range",
.maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
.data = &init_net.ipv4.ip_local_ports.range,
@@ -935,12 +842,36 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "igmp_link_local_mcast_reports",
- .data = &sysctl_igmp_llm_reports,
+ .data = &init_net.ipv4.sysctl_igmp_llm_reports,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "igmp_max_memberships",
+ .data = &init_net.ipv4.sysctl_igmp_max_memberships,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
+ .procname = "igmp_max_msf",
+ .data = &init_net.ipv4.sysctl_igmp_max_msf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#ifdef CONFIG_IP_MULTICAST
+ {
+ .procname = "igmp_qrv",
+ .data = &init_net.ipv4.sysctl_igmp_qrv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+#endif
+ {
.procname = "tcp_keepalive_time",
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,
.maxlen = sizeof(int),
@@ -961,6 +892,74 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "tcp_syn_retries",
+ .data = &init_net.ipv4.sysctl_tcp_syn_retries,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_syn_retries_min,
+ .extra2 = &tcp_syn_retries_max
+ },
+ {
+ .procname = "tcp_synack_retries",
+ .data = &init_net.ipv4.sysctl_tcp_synack_retries,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#ifdef CONFIG_SYN_COOKIES
+ {
+ .procname = "tcp_syncookies",
+ .data = &init_net.ipv4.sysctl_tcp_syncookies,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#endif
+ {
+ .procname = "tcp_reordering",
+ .data = &init_net.ipv4.sysctl_tcp_reordering,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_retries1",
+ .data = &init_net.ipv4.sysctl_tcp_retries1,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra2 = &tcp_retr1_max
+ },
+ {
+ .procname = "tcp_retries2",
+ .data = &init_net.ipv4.sysctl_tcp_retries2,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_orphan_retries",
+ .data = &init_net.ipv4.sysctl_tcp_orphan_retries,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_fin_timeout",
+ .data = &init_net.ipv4.sysctl_tcp_fin_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "tcp_notsent_lowat",
+ .data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
@@ -989,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports;
+ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
+ net->ipv4.sysctl_ip_dynaddr = 0;
+ net->ipv4.sysctl_ip_early_demux = 1;
+
return 0;
err_ports:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7bb1b091efd1..f9faadb42485 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,10 +279,9 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <asm/unaligned.h>
#include <net/busy_poll.h>
-int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
-
int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1;
@@ -405,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
tp->mss_cache = TCP_MSS_DEFAULT;
u64_stats_init(&tp->syncp);
- tp->reordering = sysctl_tcp_reordering;
+ tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
tcp_assign_congestion_control(sk);
@@ -422,7 +421,8 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- sock_update_memcg(sk);
+ if (mem_cgroup_sockets_enabled)
+ sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
}
@@ -938,7 +938,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1211,7 +1211,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1464,8 +1464,10 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ pr_err_once("%s: found a SYN, please report !\n", __func__);
offset--;
+ }
if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
@@ -1655,8 +1657,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ pr_err_once("%s: found a SYN, please report !\n", __func__);
offset--;
+ }
if (offset < skb->len)
goto found_ok_skb;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -2324,6 +2328,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
int val;
int err = 0;
@@ -2520,7 +2525,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
- else if (val > sysctl_tcp_fin_timeout / HZ)
+ else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
tp->linger2 = 0;
else
tp->linger2 = val * HZ;
@@ -2637,6 +2642,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
+ int notsent_bytes;
+ u64 rate64;
u32 rate;
memset(info, 0, sizeof(*info));
@@ -2702,18 +2709,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
rate = READ_ONCE(sk->sk_pacing_rate);
- info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_pacing_rate);
rate = READ_ONCE(sk->sk_max_pacing_rate);
- info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_max_pacing_rate);
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
- info->tcpi_bytes_acked = tp->bytes_acked;
- info->tcpi_bytes_received = tp->bytes_received;
+ put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+ put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
+
+ notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
+ info->tcpi_notsent_bytes = max(0, notsent_bytes);
+
+ info->tcpi_min_rtt = tcp_min_rtt(tp);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2722,6 +2736,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
int val, len;
if (get_user(len, optlen))
@@ -2756,12 +2771,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -2945,7 +2960,7 @@ static void __tcp_alloc_md5sig_pool(void)
struct crypto_hash *hash;
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
+ if (IS_ERR(hash))
return;
per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 55be6ac70cff..fdb286ddba04 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -124,6 +124,41 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
return false;
}
+
+/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
+ * queue this additional data / FIN.
+ */
+void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
+ return;
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ skb_dst_drop(skb);
+ __skb_pull(skb, tcp_hdrlen(skb));
+ skb_set_owner_r(skb, sk);
+
+ TCP_SKB_CB(skb)->seq++;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tp->syn_data_acked = 1;
+
+ /* u64_stats_update_begin(&tp->syncp) not needed here,
+ * as we certainly are not changing upper 32bit value (0)
+ */
+ tp->bytes_received = skb->len;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ tcp_fin(sk);
+}
+
static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb,
struct dst_entry *dst,
@@ -132,7 +167,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
struct sock *child;
- u32 end_seq;
bool own_req;
req->num_retrans = 0;
@@ -178,35 +212,11 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
tcp_init_metrics(child);
tcp_init_buffer_space(child);
- /* Queue the data carried in the SYN packet.
- * We used to play tricky games with skb_get().
- * With lockless listener, it is a dead end.
- * Do not think about it.
- *
- * XXX (TFO) - we honor a zero-payload TFO request for now,
- * (any reason not to?) but no need to queue the skb since
- * there is no data. How about SYN+FIN?
- */
- end_seq = TCP_SKB_CB(skb)->end_seq;
- if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
-
- if (likely(skb2)) {
- skb_dst_drop(skb2);
- __skb_pull(skb2, tcp_hdrlen(skb));
- skb_set_owner_r(skb2, child);
- __skb_queue_tail(&child->sk_receive_queue, skb2);
- tp->syn_data_acked = 1;
-
- /* u64_stats_update_begin(&tp->syncp) not needed here,
- * as we certainly are not changing upper 32bit value (0)
- */
- tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
- } else {
- end_seq = TCP_SKB_CB(skb)->seq + 1;
- }
- }
- tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
+ tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+
+ tcp_fastopen_add_skb(child, skb);
+
+ tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
/* tcp_conn_request() is sending the SYNACK,
* and queues the child into listener accept queue.
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0003d409fec5..e6e65f79ade8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1;
int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly = 1;
-int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
int sysctl_tcp_max_reordering __read_mostly = 300;
-EXPORT_SYMBOL(sysctl_tcp_reordering);
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -126,6 +124,10 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+#define REXMIT_NONE 0 /* no loss recovery to do */
+#define REXMIT_LOST 1 /* retransmit packets marked lost */
+#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
+
/* Adapt the MSS value used to make delayed ack decision to the
* real world.
*/
@@ -1210,6 +1212,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
sacked |= TCPCB_SACKED_ACKED;
state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount;
+ tp->delivered += pcount; /* Out-of-order packets delivered */
fack_count += pcount;
@@ -1821,8 +1824,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
static void tcp_add_reno_sack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+
tp->sacked_out++;
tcp_check_reno_reordering(sk, 0);
+ if (tp->sacked_out > prior_sacked)
+ tp->delivered++; /* Some out-of-order packet is delivered */
tcp_verify_left_out(tp);
}
@@ -1834,6 +1841,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
+ tp->delivered += max_t(int, acked - tp->sacked_out, 1);
if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0;
else
@@ -1873,6 +1881,7 @@ void tcp_enter_loss(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sk_buff *skb;
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
@@ -1923,9 +1932,9 @@ void tcp_enter_loss(struct sock *sk)
* suggests that the degree of reordering is over-estimated.
*/
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= sysctl_tcp_reordering)
+ tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
+ net->ipv4.sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2109,6 +2118,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out;
+ int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
/* Trick#1: The loss is proven. */
if (tp->lost_out)
@@ -2123,7 +2133,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
*/
packets_out = tp->packets_out;
if (packets_out <= tp->reordering &&
- tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+ tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
!tcp_may_send_now(sk)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
@@ -2164,8 +2174,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int cnt, oldcnt;
- int err;
+ int cnt, oldcnt, lost;
unsigned int mss;
/* Use SACK to deduce losses of new sequences sent during recovery */
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2214,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
mss = tcp_skb_mss(skb);
- err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
- mss, GFP_ATOMIC);
- if (err < 0)
+ /* If needed, chop off the prefix to mark as lost. */
+ lost = (packets - oldcnt) * mss;
+ if (lost < skb->len &&
+ tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
@@ -2366,8 +2376,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tp->snd_ssthresh = tp->prior_ssthresh;
tcp_ecn_withdraw_cwr(tp);
}
- } else {
- tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
@@ -2469,14 +2477,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tcp_ecn_queue_cwr(tp);
}
-static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
- int fast_rexmit, int flag)
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
int sndcnt = 0;
int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
- int newly_acked_sacked = prior_unsacked -
- (tp->packets_out - tp->sacked_out);
if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
return;
@@ -2494,7 +2500,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
} else {
sndcnt = min(delta, newly_acked_sacked);
}
- sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
}
@@ -2539,7 +2546,7 @@ static void tcp_try_keep_open(struct sock *sk)
}
}
-static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
+static void tcp_try_to_open(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2553,8 +2560,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- } else {
- tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
}
}
@@ -2664,7 +2669,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+ int *rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2686,10 +2692,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
- __tcp_push_pending_frames(sk, tcp_current_mss(sk),
- TCP_NAGLE_OFF);
- if (after(tp->snd_nxt, tp->high_seq))
- return; /* Step 2.b */
+ /* Step 2.b. Try send new data (but deferred until cwnd
+ * is updated in tcp_ack()). Otherwise fall back to
+ * the conventional recovery.
+ */
+ if (tcp_send_head(sk) &&
+ after(tcp_wnd_end(tp), tp->snd_nxt)) {
+ *rexmit = REXMIT_NEW;
+ return;
+ }
tp->frto = 0;
}
}
@@ -2708,12 +2719,11 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
- tcp_xmit_retransmit_queue(sk);
+ *rexmit = REXMIT_LOST;
}
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked,
- const int prior_unsacked, int flag)
+static bool tcp_try_undo_partial(struct sock *sk, const int acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2728,10 +2738,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
* can undo. Otherwise we clock out new packets but do not
* mark more packets lost or retransmit more.
*/
- if (tp->retrans_out) {
- tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
+ if (tp->retrans_out)
return true;
- }
if (!tcp_any_retrans_done(sk))
tp->retrans_stamp = 0;
@@ -2750,21 +2758,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
* taking into account both packets sitting in receiver's buffer and
* packets lost by network.
*
- * Besides that it does CWND reduction, when packet loss is detected
- * and changes state of machine.
+ * Besides that it updates the congestion state when packet loss or ECN
+ * is detected. But it does not reduce the cwnd, it is done by the
+ * congestion control later.
*
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- const int prior_unsacked,
- bool is_dupack, int flag)
+ bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
@@ -2811,8 +2819,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
/* Use RACK to detect loss */
if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
- tcp_rack_mark_lost(sk))
+ tcp_rack_mark_lost(sk)) {
flag |= FLAG_LOST_RETRANS;
+ *ack_flag |= FLAG_LOST_RETRANS;
+ }
/* E. Process state. */
switch (icsk->icsk_ca_state) {
@@ -2821,7 +2831,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
+ if (tcp_try_undo_partial(sk, acked))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
@@ -2833,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
}
break;
case TCP_CA_Loss:
- tcp_process_loss(sk, flag, is_dupack);
+ tcp_process_loss(sk, flag, is_dupack, rexmit);
if (icsk->icsk_ca_state != TCP_CA_Open &&
!(flag & FLAG_LOST_RETRANS))
return;
@@ -2850,7 +2860,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
- tcp_try_to_open(sk, flag, prior_unsacked);
+ tcp_try_to_open(sk, flag);
return;
}
@@ -2872,8 +2882,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
- tcp_xmit_retransmit_queue(sk);
+ *rexmit = REXMIT_LOST;
}
/* Kathleen Nichols' algorithm for tracking the minimum value of
@@ -2898,7 +2907,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+ struct rtt_meas rttm = {
+ .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+ .ts = now,
+ };
u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@ -3095,7 +3107,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una,
+ u32 prior_snd_una, int *acked,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3153,10 +3165,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
flag |= FLAG_ORIG_SACK_ACKED;
}
- if (sacked & TCPCB_SACKED_ACKED)
+ if (sacked & TCPCB_SACKED_ACKED) {
tp->sacked_out -= acked_pcount;
- else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
- tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ } else if (tcp_is_sack(tp)) {
+ tp->delivered += acked_pcount;
+ if (!tcp_skb_spurious_retrans(tp, skb))
+ tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ }
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3265,6 +3280,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
}
#endif
+ *acked = pkts_acked;
return flag;
}
@@ -3298,21 +3314,36 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
/* Decide wheather to run the increase function of congestion control. */
static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
- if (tcp_in_cwnd_reduction(sk))
- return false;
-
/* If reordering is high then always grow cwnd whenever data is
* delivered regardless of its ordering. Otherwise stay conservative
* and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
}
+/* The "ultimate" congestion control function that aims to replace the rigid
+ * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
+ * It's called toward the end of processing an ACK with precise rate
+ * information. All transmission or retransmission are delayed afterwards.
+ */
+static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
+ int flag)
+{
+ if (tcp_in_cwnd_reduction(sk)) {
+ /* Reduce cwnd if state mandates */
+ tcp_cwnd_reduction(sk, acked_sacked, flag);
+ } else if (tcp_may_raise_cwnd(sk, flag)) {
+ /* Advance cwnd if state allows */
+ tcp_cong_avoid(sk, ack, acked_sacked);
+ }
+ tcp_update_pacing_rate(sk);
+}
+
/* Check that window update is acceptable.
* The function assumes that snd_una<=ack<=snd_next.
*/
@@ -3508,6 +3539,27 @@ static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
icsk->icsk_ca_ops->in_ack_event(sk, flags);
}
+/* Congestion control has updated the cwnd already. So if we're in
+ * loss recovery then now we do any new sends (for FRTO) or
+ * retransmits (for CA_Loss or CA_recovery) that make sense.
+ */
+static void tcp_xmit_recovery(struct sock *sk, int rexmit)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (rexmit == REXMIT_NONE)
+ return;
+
+ if (unlikely(rexmit == 2)) {
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk),
+ TCP_NAGLE_OFF);
+ if (after(tp->snd_nxt, tp->high_seq))
+ return;
+ tp->frto = 0;
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@ -3520,8 +3572,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_dupack = false;
u32 prior_fackets;
int prior_packets = tp->packets_out;
- const int prior_unsacked = tp->packets_out - tp->sacked_out;
+ u32 prior_delivered = tp->delivered;
int acked = 0; /* Number of packets newly acked */
+ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
sack_state.first_sackt.v64 = 0;
@@ -3610,23 +3663,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- acked = tp->packets_out;
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
- acked -= tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
- /* Advance cwnd if state allows */
- if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked);
-
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
@@ -3635,14 +3681,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- tcp_update_pacing_rate(sk);
+ tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ tcp_xmit_recovery(sk, rexmit);
return 1;
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3665,8 +3711,8 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, prior_unsacked,
- is_dupack, flag);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_xmit_recovery(sk, rexmit);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3997,7 +4043,7 @@ void tcp_reset(struct sock *sk)
*
* If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
*/
-static void tcp_fin(struct sock *sk)
+void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5511,6 +5557,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->syn_data_acked = tp->syn_data;
if (tp->syn_data_acked)
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+
+ tcp_fastopen_add_skb(sk, synack);
+
return false;
}
@@ -6117,9 +6166,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
bool want_cookie = false;
+ struct net *net = sock_net(sk);
#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
+ if (net->ipv4.sysctl_tcp_syncookies) {
msg = "Sending cookies";
want_cookie = true;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6128,7 +6178,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
if (!queue->synflood_warned &&
- sysctl_tcp_syncookies != 2 &&
+ net->ipv4.sysctl_tcp_syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -6161,6 +6211,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
struct dst_entry *dst = NULL;
struct request_sock *req;
@@ -6171,7 +6222,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((sysctl_tcp_syncookies == 2 ||
+ if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
if (!want_cookie)
@@ -6237,7 +6288,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
}
}
/* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
+ else if (!net->ipv4.sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst, false,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65947c1f4733..4c8d58dfac9b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,7 +73,6 @@
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
-#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
#include <linux/inet.h>
@@ -312,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@ -324,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq)
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@ -384,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -638,8 +642,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net,
- &tcp_hashinfo, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
@@ -708,7 +712,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -723,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -785,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sock_net(sk), skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -804,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
@@ -858,7 +865,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(inet_rsk(req)->opt);
}
-
#ifdef CONFIG_TCP_MD5SIG
/*
* RFC2385 MD5 checksumming requires a mapping of
@@ -1580,7 +1586,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest);
if (!sk)
goto no_tcp_socket;
@@ -1590,28 +1597,30 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1694,7 +1703,8 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo,
+ &tcp_hashinfo, skb,
+ __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
@@ -1818,7 +1828,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
- sock_release_memcg(sk);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2342,11 +2354,6 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_MEMCG_KMEM
- .init_cgroup = tcp_init_cgroup,
- .destroy_cgroup = tcp_destroy_cgroup,
- .proto_cgroup = tcp_proto_cgroup,
-#endif
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
@@ -2389,6 +2396,16 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+ net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+ net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+ net->ipv4.sysctl_tcp_syncookies = 1;
+ net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+ net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
+ net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
+ net->ipv4.sysctl_tcp_orphan_retries = 0;
+ net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+ net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+
return 0;
fail:
tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
deleted file mode 100644
index 2379c1b4efb2..000000000000
--- a/net/ipv4/tcp_memcontrol.c
+++ /dev/null
@@ -1,233 +0,0 @@
-#include <net/tcp.h>
-#include <net/tcp_memcontrol.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#include <linux/nsproxy.h>
-#include <linux/memcontrol.h>
-#include <linux/module.h>
-
-int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
- /*
- * The root cgroup does not use page_counters, but rather,
- * rely on the data already collected by the network
- * subsystem
- */
- struct mem_cgroup *parent = parent_mem_cgroup(memcg);
- struct page_counter *counter_parent = NULL;
- struct cg_proto *cg_proto, *parent_cg;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return 0;
-
- cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
- cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
- cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
- cg_proto->memory_pressure = 0;
- cg_proto->memcg = memcg;
-
- parent_cg = tcp_prot.proto_cgroup(parent);
- if (parent_cg)
- counter_parent = &parent_cg->memory_allocated;
-
- page_counter_init(&cg_proto->memory_allocated, counter_parent);
- percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
-
- return 0;
-}
-EXPORT_SYMBOL(tcp_init_cgroup);
-
-void tcp_destroy_cgroup(struct mem_cgroup *memcg)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return;
-
- percpu_counter_destroy(&cg_proto->sockets_allocated);
-
- if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
- static_key_slow_dec(&memcg_socket_limit_enabled);
-
-}
-EXPORT_SYMBOL(tcp_destroy_cgroup);
-
-static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
-{
- struct cg_proto *cg_proto;
- int i;
- int ret;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return -EINVAL;
-
- ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
- if (ret)
- return ret;
-
- for (i = 0; i < 3; i++)
- cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
- sysctl_tcp_mem[i]);
-
- if (nr_pages == PAGE_COUNTER_MAX)
- clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else {
- /*
- * The active bit needs to be written after the static_key
- * update. This is what guarantees that the socket activation
- * function is the last one to run. See sock_update_memcg() for
- * details, and note that we don't mark any socket as belonging
- * to this memcg until that flag is up.
- *
- * We need to do this, because static_keys will span multiple
- * sites, but we can't control their order. If we mark a socket
- * as accounted, but the accounting functions are not patched in
- * yet, we'll lose accounting.
- *
- * We never race with the readers in sock_update_memcg(),
- * because when this value change, the code to process it is not
- * patched in yet.
- *
- * The activated bit is used to guarantee that no two writers
- * will do the update in the same memcg. Without that, we can't
- * properly shutdown the static key.
- */
- if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
- static_key_slow_inc(&memcg_socket_limit_enabled);
- set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- }
-
- return 0;
-}
-
-enum {
- RES_USAGE,
- RES_LIMIT,
- RES_MAX_USAGE,
- RES_FAILCNT,
-};
-
-static DEFINE_MUTEX(tcp_limit_mutex);
-
-static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned long nr_pages;
- int ret = 0;
-
- buf = strstrip(buf);
-
- switch (of_cft(of)->private) {
- case RES_LIMIT:
- /* see memcontrol.c */
- ret = page_counter_memparse(buf, "-1", &nr_pages);
- if (ret)
- break;
- mutex_lock(&tcp_limit_mutex);
- ret = tcp_update_limit(memcg, nr_pages);
- mutex_unlock(&tcp_limit_mutex);
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret ?: nbytes;
-}
-
-static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
- u64 val;
-
- switch (cft->private) {
- case RES_LIMIT:
- if (!cg_proto)
- return PAGE_COUNTER_MAX;
- val = cg_proto->memory_allocated.limit;
- val *= PAGE_SIZE;
- break;
- case RES_USAGE:
- if (!cg_proto)
- val = atomic_long_read(&tcp_memory_allocated);
- else
- val = page_counter_read(&cg_proto->memory_allocated);
- val *= PAGE_SIZE;
- break;
- case RES_FAILCNT:
- if (!cg_proto)
- return 0;
- val = cg_proto->memory_allocated.failcnt;
- break;
- case RES_MAX_USAGE:
- if (!cg_proto)
- return 0;
- val = cg_proto->memory_allocated.watermark;
- val *= PAGE_SIZE;
- break;
- default:
- BUG();
- }
- return val;
-}
-
-static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg;
- struct cg_proto *cg_proto;
-
- memcg = mem_cgroup_from_css(of_css(of));
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return nbytes;
-
- switch (of_cft(of)->private) {
- case RES_MAX_USAGE:
- page_counter_reset_watermark(&cg_proto->memory_allocated);
- break;
- case RES_FAILCNT:
- cg_proto->memory_allocated.failcnt = 0;
- break;
- }
-
- return nbytes;
-}
-
-static struct cftype tcp_files[] = {
- {
- .name = "kmem.tcp.limit_in_bytes",
- .write = tcp_cgroup_write,
- .read_u64 = tcp_cgroup_read,
- .private = RES_LIMIT,
- },
- {
- .name = "kmem.tcp.usage_in_bytes",
- .read_u64 = tcp_cgroup_read,
- .private = RES_USAGE,
- },
- {
- .name = "kmem.tcp.failcnt",
- .private = RES_FAILCNT,
- .write = tcp_cgroup_reset,
- .read_u64 = tcp_cgroup_read,
- },
- {
- .name = "kmem.tcp.max_usage_in_bytes",
- .private = RES_MAX_USAGE,
- .write = tcp_cgroup_reset,
- .read_u64 = tcp_cgroup_read,
- },
- { } /* terminate */
-};
-
-static int __init tcp_memcontrol_init(void)
-{
- WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
- return 0;
-}
-__initcall(tcp_memcontrol_init);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..c26241f3057b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -369,6 +369,7 @@ void tcp_update_metrics(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct tcp_metrics_block *tm;
unsigned long rtt;
u32 val;
@@ -473,7 +474,7 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != sysctl_tcp_reordering)
+ tp->reordering != net->ipv4.sysctl_tcp_reordering)
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a925824..fadd8b978951 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -27,9 +27,6 @@
#include <net/inet_common.h>
#include <net/xfrm.h>
-int sysctl_tcp_syncookies __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_syncookies);
-
int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9864a2dbadce..773083b7f1e9 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->fin = th->psh = 0;
th->check = newcheck;
- if (skb->ip_summed != CHECKSUM_PARTIAL)
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ gso_reset_checksum(skb, ~th->check);
+ else
th->check = gso_make_checksum(skb, ~th->check);
seq += mss;
@@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
- if (skb->ip_summed != CHECKSUM_PARTIAL)
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ gso_reset_checksum(skb, ~th->check);
+ else
th->check = gso_make_checksum(skb, ~th->check);
out:
return segs;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 412a920fe0ec..7d2c7a400456 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
-EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
-
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
@@ -2813,13 +2810,16 @@ begin_fwd:
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt, status;
+ int amt;
if (size <= sk->sk_forward_alloc)
return;
amt = sk_mem_pages(size);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- sk_memory_allocated_add(sk, amt, &status);
+ sk_memory_allocated_add(sk, amt);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt);
}
/* Send a FIN. The caller locks the socket for us.
@@ -3473,6 +3473,7 @@ void tcp_send_probe0(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
unsigned long probe_max;
int err;
@@ -3486,7 +3487,7 @@ void tcp_send_probe0(struct sock *sk)
}
if (err <= 0) {
- if (icsk->icsk_backoff < sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
icsk->icsk_backoff++;
icsk->icsk_probes_out++;
probe_max = TCP_RTO_MAX;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a4730a28b220..49bc474f8e35 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,11 +22,6 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
-int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
-int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
-int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
-int sysctl_tcp_orphan_retries __read_mostly;
int sysctl_tcp_thin_linear_timeouts __read_mostly;
static void tcp_write_err(struct sock *sk)
@@ -82,7 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
/* Calculate maximal number or retries on an orphaned socket. */
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -157,6 +152,7 @@ static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
int retry_until;
bool do_reset, syn_set = false;
@@ -169,10 +165,10 @@ static int tcp_write_timeout(struct sock *sk)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
- retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
syn_set = true;
} else {
- if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+ if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
/* Some middle-boxes may black-hole Fast Open _after_
* the handshake. Therefore we conservatively disable
* Fast Open on this path on recurring timeouts with
@@ -181,7 +177,7 @@ static int tcp_write_timeout(struct sock *sk)
if (tp->syn_data_acked &&
tp->bytes_acked <= tp->rx_opt.mss_clamp) {
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+ if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
@@ -191,7 +187,7 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
}
- retry_until = sysctl_tcp_retries2;
+ retry_until = net->ipv4.sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -305,7 +301,7 @@ static void tcp_probe_timer(struct sock *sk)
(s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
goto abort;
- max_probes = sysctl_tcp_retries2;
+ max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -332,7 +328,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int max_retries = icsk->icsk_syn_retries ? :
- sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+ sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct request_sock *req;
req = tcp_sk(sk)->fastopen_rsk;
@@ -360,6 +356,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
if (tp->fastopen_rsk) {
@@ -490,7 +487,7 @@ out_reset_timer:
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
- if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
+ if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dc45b538e237..836abe58a9c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -356,8 +356,8 @@ EXPORT_SYMBOL(udp_lib_get_port);
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct sock *sk, *result;
struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
begin:
@@ -512,14 +513,18 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
}
matches = 1;
}
@@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
rcu_read_lock();
@@ -601,14 +607,18 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
}
matches = 1;
}
@@ -838,32 +848,20 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
{
struct udphdr *uh = udp_hdr(skb);
- if (nocheck)
+ if (nocheck) {
uh->check = 0;
- else if (skb_is_gso(skb))
+ } else if (skb_is_gso(skb)) {
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = 0;
+ uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v4_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp_set_csum);
@@ -1038,8 +1036,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4c519c1dc161..f5abb1ae1358 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -32,42 +32,58 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features),
__be16 new_protocol, bool is_ipv6)
{
+ int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ bool remcsum, need_csum, offload_csum, ufo;
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct udphdr *uh = udp_hdr(skb);
u16 mac_offset = skb->mac_header;
- int mac_len = skb->mac_len;
- int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
__be16 protocol = skb->protocol;
- netdev_features_t enc_features;
+ u16 mac_len = skb->mac_len;
int udp_offset, outer_hlen;
- unsigned int oldlen;
- bool need_csum = !!(skb_shinfo(skb)->gso_type &
- SKB_GSO_UDP_TUNNEL_CSUM);
- bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
- bool offload_csum = false, dont_encap = (need_csum || remcsum);
-
- oldlen = (u16)~skb->len;
+ u32 partial;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
+ /* adjust partial header checksum to negate old length */
+ partial = (__force u32)uh->check + (__force u16)~uh->len;
+
+ /* setup inner skb. */
skb->encapsulation = 0;
__skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
+
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
skb->encap_hdr_csum = need_csum;
+
+ remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
skb->remcsum_offload = remcsum;
+ ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
- ((skb->dev->features & NETIF_F_HW_CSUM) ||
- (skb->dev->features & (is_ipv6 ?
- NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
+ (skb->dev->features &
+ (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
+ (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
+
+ features &= skb->dev->hw_enc_features;
+
+ /* The only checksum offload we care about from here on out is the
+ * outer one so strip the existing checksum feature flags and
+ * instead set the flag based on our outer checksum offload value.
+ */
+ if (remcsum || ufo) {
+ features &= ~NETIF_F_CSUM_MASK;
+ if (!need_csum || offload_csum)
+ features |= NETIF_F_HW_CSUM;
+ }
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & features;
- segs = gso_inner_segment(skb, enc_features);
+ segs = gso_inner_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len);
@@ -78,17 +94,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
udp_offset = outer_hlen - tnl_hlen;
skb = segs;
do {
- struct udphdr *uh;
- int len;
- __be32 delta;
+ __be16 len;
- if (dont_encap) {
- skb->encapsulation = 0;
+ if (remcsum)
skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* Only set up inner headers if we might be offloading
- * inner checksum.
- */
+
+ /* Set up inner headers if we are offloading inner checksum */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
}
@@ -96,43 +108,28 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb->mac_len = mac_len;
skb->protocol = protocol;
- skb_push(skb, outer_hlen);
+ __skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
skb_set_transport_header(skb, udp_offset);
- len = skb->len - udp_offset;
+ len = htons(skb->len - udp_offset);
uh = udp_hdr(skb);
- uh->len = htons(len);
+ uh->len = len;
if (!need_csum)
continue;
- delta = htonl(oldlen + len);
-
uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
- if (offload_csum) {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- } else if (remcsum) {
- /* Need to calculate checksum from scratch,
- * inner checksums are never when doing
- * remote_checksum_offload.
- */
-
- skb->csum = skb_checksum(skb, udp_offset,
- skb->len - udp_offset,
- 0);
- uh->check = csum_fold(skb->csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- uh->check = gso_make_checksum(skb, ~uh->check);
+ ((__force u32)len + partial));
+ if (skb->encapsulation || !offload_csum) {
+ uh->check = gso_make_checksum(skb, ~uh->check);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
}
} while ((skb = skb->next));
out:
@@ -235,6 +232,13 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE;
+ /* If there is no outer header we can fake a checksum offload
+ * due to the fact that we have already done the checksum in
+ * software prior to segmenting the frame.
+ */
+ if (!skb->encap_hdr_csum)
+ features |= NETIF_F_HW_CSUM;
+
/* Fragment the skb. IP headers of the fragments are updated in
* inet_gso_segment()
*/
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index bb7dabe2ebbf..11e875ffd7ac 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,6 +69,7 @@ config INET6_ESP
select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
+ select CRYPTO_ECHAINIV
---help---
Support for IPsec ESP.
@@ -206,6 +207,7 @@ config IPV6_NDISC_NODETYPE
config IPV6_TUNNEL
tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
select INET6_TUNNEL
+ select DST_CACHE
---help---
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 38eeddedfc21..a2d6f6c242af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
},
.use_oif_addrs_only = 0,
.ignore_routes_with_linkdown = 0,
+ .keep_addr_on_down = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
},
.use_oif_addrs_only = 0,
.ignore_routes_with_linkdown = 0,
+ .keep_addr_on_down = 0,
};
/* Check if a valid qdisc is available */
@@ -583,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -3168,6 +3170,55 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static int fixup_permanent_addr(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+{
+ if (!ifp->rt) {
+ struct rt6_info *rt;
+
+ rt = addrconf_dst_alloc(idev, &ifp->addr, false);
+ if (unlikely(IS_ERR(rt)))
+ return PTR_ERR(rt);
+
+ ifp->rt = rt;
+ }
+
+ if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ idev->dev, 0, 0);
+ }
+
+ addrconf_dad_start(ifp);
+
+ return 0;
+}
+
+static void addrconf_permanent_addr(struct net_device *dev)
+{
+ struct inet6_ifaddr *ifp, *tmp;
+ struct inet6_dev *idev;
+
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ return;
+
+ write_lock_bh(&idev->lock);
+
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
+ if ((ifp->flags & IFA_F_PERMANENT) &&
+ fixup_permanent_addr(idev, ifp) < 0) {
+ write_unlock_bh(&idev->lock);
+ ipv6_del_addr(ifp);
+ write_lock_bh(&idev->lock);
+
+ net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
+ idev->dev->name, &ifp->addr);
+ }
+ }
+
+ write_unlock_bh(&idev->lock);
+}
+
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -3253,6 +3304,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
+ /* restore routes for permanent addresses */
+ addrconf_permanent_addr(dev);
+
switch (dev->type) {
#if IS_ENABLED(CONFIG_IPV6_SIT)
case ARPHRD_SIT:
@@ -3356,7 +3410,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
{
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ struct inet6_ifaddr *ifa, *tmp;
+ struct list_head del_list;
+ int _keep_addr;
+ bool keep_addr;
int state, i;
ASSERT_RTNL();
@@ -3383,6 +3440,16 @@ static int addrconf_ifdown(struct net_device *dev, int how)
}
+ /* aggregate the system setting and interface setting */
+ _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+ if (!_keep_addr)
+ _keep_addr = idev->cnf.keep_addr_on_down;
+
+ /* combine the user config with event to determine if permanent
+ * addresses are to be removed from address hash table
+ */
+ keep_addr = !(how || _keep_addr <= 0);
+
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
struct hlist_head *h = &inet6_addr_lst[i];
@@ -3391,9 +3458,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
- hlist_del_init_rcu(&ifa->addr_lst);
addrconf_del_dad_work(ifa);
- goto restart;
+ /* combined flag + permanent flag decide if
+ * address is retained on a down event
+ */
+ if (!keep_addr ||
+ !(ifa->flags & IFA_F_PERMANENT)) {
+ hlist_del_init_rcu(&ifa->addr_lst);
+ goto restart;
+ }
}
}
spin_unlock_bh(&addrconf_hash_lock);
@@ -3427,31 +3500,53 @@ restart:
write_lock_bh(&idev->lock);
}
- while (!list_empty(&idev->addr_list)) {
- ifa = list_first_entry(&idev->addr_list,
- struct inet6_ifaddr, if_list);
- addrconf_del_dad_work(ifa);
+ /* re-combine the user config with event to determine if permanent
+ * addresses are to be removed from the interface list
+ */
+ keep_addr = (!how && _keep_addr > 0);
- list_del(&ifa->if_list);
+ INIT_LIST_HEAD(&del_list);
+ list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ addrconf_del_dad_work(ifa);
write_unlock_bh(&idev->lock);
-
spin_lock_bh(&ifa->lock);
- state = ifa->state;
- ifa->state = INET6_IFADDR_STATE_DEAD;
+
+ if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) {
+ /* set state to skip the notifier below */
+ state = INET6_IFADDR_STATE_DEAD;
+ ifa->state = 0;
+ if (!(ifa->flags & IFA_F_NODAD))
+ ifa->flags |= IFA_F_TENTATIVE;
+ } else {
+ state = ifa->state;
+ ifa->state = INET6_IFADDR_STATE_DEAD;
+
+ list_del(&ifa->if_list);
+ list_add(&ifa->if_list, &del_list);
+ }
+
spin_unlock_bh(&ifa->lock);
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
}
- in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
}
write_unlock_bh(&idev->lock);
+ /* now clean up addresses to be removed */
+ while (!list_empty(&del_list)) {
+ ifa = list_first_entry(&del_list,
+ struct inet6_ifaddr, if_list);
+ list_del(&ifa->if_list);
+
+ in6_ifa_put(ifa);
+ }
+
/* Step 5: Discard anycast and multicast list */
if (how) {
ipv6_ac_destroy_dev(idev);
@@ -3538,6 +3633,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3583,7 +3679,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@ -3591,6 +3687,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -4711,6 +4809,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
/* we omit DEVCONF_STABLE_SECRET for now */
array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
+ array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
+ array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
}
static inline size_t inet6_ifla6_size(void)
@@ -5192,10 +5293,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (rt)
ip6_del_rt(rt);
}
- dst_hold(&ifp->rt->dst);
-
- ip6_del_rt(ifp->rt);
+ if (ifp->rt) {
+ dst_hold(&ifp->rt->dst);
+ ip6_del_rt(ifp->rt);
+ ifp->rt = NULL;
+ }
rt_genid_bump_ipv6(net);
break;
}
@@ -5785,6 +5888,28 @@ static struct addrconf_sysctl_table
.proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
},
{
+ .procname = "drop_unicast_in_l2_multicast",
+ .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "drop_unsolicited_na",
+ .data = &ipv6_devconf.drop_unsolicited_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "keep_addr_on_down",
+ .data = &ipv6_devconf.keep_addr_on_down,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9f5137cd604e..b11c37cfd67c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -235,7 +235,11 @@ lookup_protocol:
* creation time automatically shares.
*/
inet->inet_sport = htons(inet->inet_num);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 517c55b01ba8..428162155280 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,9 @@ ipv4_connected:
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 32dc9aab7297..30613050e4ca 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -99,5 +99,6 @@ static void __exit ila_fini(void)
module_init(ila_init);
module_exit(ila_fini);
+MODULE_ALIAS_RTNL_LWT(ILA);
MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 36c3f0155010..532c3ef282c5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
#include <net/ip6_route.h>
#include <net/sock.h>
#include <net/inet6_connection_sock.h>
+#include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
@@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..70f2628be6fa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
#include <linux/module.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
const struct hlist_nulls_node *node;
struct sock *result;
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -177,6 +193,7 @@ begin:
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sock *sk;
local_bh_disable();
- sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+ sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ ntohs(dport), dif);
local_bh_enable();
return sk;
@@ -274,3 +292,59 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
+
+int inet6_hash(struct sock *sk)
+{
+ if (sk->sk_state != TCP_CLOSE) {
+ local_bh_disable();
+ __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ local_bh_enable();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet6_hash);
+
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d7322fb22..8f920580976f 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -98,27 +98,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+ else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ uh->check = 0;
+ uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- } else {
- __wsum csum;
-
- BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, len, 0);
- uh->check = udp_v6_check(len, saddr, daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1f9ebe3cbb4a..dc2db4f7b182 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp)) != NULL;
+ (sfl = rcu_dereference_protected(*sflp,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = rcu_dereference(sfl->next);
+ *sflp = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f37f18b6b40c..f7c9560b75fa 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_get(tunnel);
+ dst = dst_cache_get(&tunnel->dst_cache);
if (!dst) {
dst = ip6_route_output(net, NULL, fl6);
@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark && ndst)
- ip6_tnl_dst_set(tunnel, ndst);
+ dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
@@ -1009,7 +1009,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
}
@@ -1219,7 +1219,7 @@ static void ip6gre_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6_tnl_dst_destroy(t);
+ dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1257,7 +1257,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
- ret = ip6_tnl_dst_init(tunnel);
+ ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
@@ -1512,6 +1512,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9075acf081dd..c05c425c2389 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,7 +49,7 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
+ if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
@@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
goto err;
+ /* If enabled, drop unicast packets that were encapsulated in link-layer
+ * multicast or broadcast to protected against the so-called "hole-196"
+ * attack in 802.11 wireless.
+ */
+ if (!ipv6_addr_is_multicast(&hdr->daddr) &&
+ (skb->pkt_type == PACKET_BROADCAST ||
+ skb->pkt_type == PACKET_MULTICAST) &&
+ idev->cnf.drop_unicast_in_l2_multicast)
+ goto err;
+
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
* field contains the reserved value 0; if such a packet is received, it
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 23de98f976d5..a163102f1803 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
#endif
int err;
+ int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
dst_release(*dst);
*dst = NULL;
}
+
+ if (fl6->flowi6_oif)
+ flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca42aaa6..3f3aabd2f07b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
return &dev->stats;
}
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
- struct dst_entry *dst)
-{
- write_seqlock_bh(&idst->lock);
- dst_release(rcu_dereference_protected(
- idst->dst,
- lockdep_is_held(&idst->lock.lock)));
- if (dst) {
- dst_hold(dst);
- idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
- } else {
- idst->cookie = 0;
- }
- rcu_assign_pointer(idst->dst, dst);
- write_sequnlock_bh(&idst->lock);
-}
-
-struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
-{
- struct ip6_tnl_dst *idst;
- struct dst_entry *dst;
- unsigned int seq;
- u32 cookie;
-
- idst = raw_cpu_ptr(t->dst_cache);
-
- rcu_read_lock();
- do {
- seq = read_seqbegin(&idst->lock);
- dst = rcu_dereference(idst->dst);
- cookie = idst->cookie;
- } while (read_seqretry(&idst->lock, seq));
-
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
- dst = NULL;
- rcu_read_unlock();
-
- if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
- ip6_tnl_per_cpu_dst_set(idst, NULL);
- dst_release(dst);
- dst = NULL;
- }
- return dst;
-}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
-
-void ip6_tnl_dst_reset(struct ip6_tnl *t)
-{
- int i;
-
- for_each_possible_cpu(i)
- ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
-}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-
-void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
-{
- ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
-
-}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
-
-void ip6_tnl_dst_destroy(struct ip6_tnl *t)
-{
- if (!t->dst_cache)
- return;
-
- ip6_tnl_dst_reset(t);
- free_percpu(t->dst_cache);
-}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
-
-int ip6_tnl_dst_init(struct ip6_tnl *t)
-{
- int i;
-
- t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
- if (!t->dst_cache)
- return -ENOMEM;
-
- for_each_possible_cpu(i)
- seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
-
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point
@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6_tnl_dst_destroy(t);
+ dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
ip6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
} else if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_get(t);
+ dst = dst_cache_get(&t->dst_cache);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
goto tx_err_link_failure;
@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark && ndst)
- ip6_tnl_dst_set(t, ndst);
+ dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
skb->transport_header = skb->network_header;
@@ -1366,7 +1275,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
t->parms.proto = p->proto;
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t);
return 0;
}
@@ -1637,7 +1546,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
- ret = ip6_tnl_dst_init(t);
+ ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0a8610b33d79..d90a11f14040 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
t->parms.proto = p->proto;
- ip6_tnl_dst_reset(t);
+ dst_cache_reset(&t->dst_cache);
vti6_link_config(t);
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 84afb9a77278..c245895a3d41 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
@@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
return;
}
+ /* For some 802.11 wireless deployments (and possibly other networks),
+ * there will be a NA proxy and unsolicitd packets are attacks
+ * and thus should not be accepted.
+ */
+ if (!msg->icmph.icmp6_solicited && idev &&
+ idev->cnf.drop_unsolicited_na)
+ return;
+
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca19757..051b6a6bfff6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -21,6 +21,10 @@
#include <net/ipv6.h>
#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+#define MAX_WORK_COUNT 16
+
+static atomic_t v6_worker_count;
+
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
const struct net_device *out)
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
.notifier_call = masq_device_event,
};
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+ long index;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ index = w->ifindex;
+ nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&v6_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
- struct netdev_notifier_info info;
+ const struct net_device *dev;
+ struct masq_dev_work *w;
+ struct net *net;
+
+ if (event != NETDEV_DOWN ||
+ atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+ return NOTIFY_DONE;
+
+ dev = ifa->idev->dev;
+ net = maybe_get_net(dev_net(dev));
+ if (!net)
+ return NOTIFY_DONE;
- netdev_notifier_info_init(&info, ifa->idev->dev);
- return masq_device_event(this, event, &info);
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ atomic_inc(&v6_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = dev->ifindex;
+ w->net = net;
+ schedule_work(&w->work);
+
+ return NOTIFY_DONE;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+ return NOTIFY_DONE;
}
static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 18f3498a6c80..e2ea31175ef9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ skb_postpush_rcsum(head, skb_network_header(head),
+ skb_network_header_len(head));
rcu_read_lock();
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3c8834bc822d..ed446639219c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
struct dst_entry *dst;
- int flags = 0;
bool any_src;
dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e794ef66a401..f45b8ffc2840 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
if ((__force u16)t->parms.i_flags & SIT_ISATAP)
dev->priv_flags |= IFF_ISATAP;
+ dev->rtnl_link_ops = &sit_link_ops;
+
err = register_netdevice(dev);
if (err < 0)
goto out;
ipip6_tunnel_clone_6rd(dev, sitn);
- dev->rtnl_link_ops = &sit_link_ops;
-
dev_hold(dev);
ipip6_tunnel_link(sitn, t);
@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
- ip_tunnel_dst_reset_all(tunnel);
+ dst_cache_reset(&tunnel->dst_cache);
dev_put(dev);
}
@@ -740,7 +740,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto))
+ if (iptunnel_pull_header(skb, 0, tpi.proto, false))
goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
@@ -911,7 +911,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
if (IS_ERR(skb)) {
ip_rt_put(rt);
goto out;
@@ -1000,7 +1000,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
if (IS_ERR(skb))
goto out;
@@ -1093,7 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev);
}
@@ -1124,7 +1124,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev);
return 0;
}
@@ -1278,7 +1278,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
break;
@@ -1339,7 +1339,7 @@ static void ipip6_dev_free(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1372,6 +1372,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ int err;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1382,10 +1383,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
+ err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (err) {
free_percpu(dev->tstats);
- return -ENOMEM;
+ return err;
}
return 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2906ef20795e..0e393ff7f5d0 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -148,7 +148,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst;
__u8 rcv_wscale;
- if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index db9f1c318afc..33f2820181f9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,7 +61,6 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
-#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
#include <linux/proc_fs.h>
@@ -328,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
+ bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -346,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
seq = ntohl(th->seq);
+ fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -401,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
switch (sk->sk_state) {
@@ -867,7 +867,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->saddr,
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
@@ -1376,8 +1377,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- inet6_iif(skb));
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ th->source, th->dest, inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1387,7 +1388,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
@@ -1395,24 +1396,24 @@ process:
reqsk_put(req);
goto discard_it;
}
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1501,6 +1502,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), tcp_v6_iif(skb));
@@ -1866,7 +1868,7 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
@@ -1889,9 +1891,6 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_MEMCG_KMEM
- .proto_cgroup = tcp_proto_cgroup,
-#endif
.clear_sk = tcp_v6_clear_sk,
.diag_destroy = tcp_abort,
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5d2c2afffe7b..0711f8fe4d44 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
+#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
@@ -77,49 +78,6 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-
static u32 udp6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
@@ -257,6 +215,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
struct sock *sk, *result;
struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
begin:
@@ -270,14 +229,18 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
}
matches = 1;
}
@@ -321,6 +284,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
rcu_read_lock();
@@ -358,14 +322,18 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
}
matches = 1;
}
@@ -580,6 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
const struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr *)(skb->data+offset);
struct sock *sk;
+ int harderr;
int err;
struct net *net = dev_net(skb->dev);
@@ -591,26 +560,27 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
+ harderr = icmpv6_err_convert(type, code, &err);
+ np = inet6_sk(sk);
+
if (type == ICMPV6_PKT_TOOBIG) {
if (!ip6_sk_accept_pmtu(sk))
goto out;
ip6_sk_update_pmtu(skb, sk, info);
+ if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+ harderr = 1;
}
if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
goto out;
}
- np = inet6_sk(sk);
-
- if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
- goto out;
-
- if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
- goto out;
-
- if (np->recverr)
+ if (!np->recverr) {
+ if (!harderr || sk->sk_state != TCP_ESTABLISHED)
+ goto out;
+ } else {
ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
+ }
sk->sk_err = err;
sk->sk_error_report(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 7441e1e63893..2b0fbe6929e8 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -81,12 +81,18 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
csum = skb_checksum(skb, 0, skb->len, 0);
uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
&ipv6h->daddr, csum);
-
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_NONE;
+ /* If there is no outer header we can fake a checksum offload
+ * due to the fact that we have already done the checksum in
+ * software prior to segmenting the frame.
+ */
+ if (!skb->encap_hdr_csum)
+ features |= NETIF_F_HW_CSUM;
+
/* Check if there is enough headroom to insert fragment header. */
tnl_hlen = skb_tnl_header_len(skb);
if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index f7fbdbabe50e..372855eeaf42 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -23,7 +23,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP6_ECN_set_ce(inner_iph);
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 3c4caa60c926..5728e76ca6d5 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
return -1;
}
skb_put(skb, count);
+ pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
spin_unlock_irqrestore(&self->spinlock, flags);
- pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
if (flush) {
/* ircomm_tty_do_softint will take care of the rest */
schedule_work(&self->tqueue);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ef50a94d3eb7..fc3598a922b0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_IUCV)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_iucv))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != IUCV_OPEN) {
err = -EBADFD;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a2c8747d2936..6b54ff3ff4cb 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -25,6 +25,7 @@
#include <net/udp.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -718,7 +719,7 @@ static struct proto l2tp_ip6_prot = {
.sendmsg = l2tp_ip6_sendmsg,
.recvmsg = l2tp_ip6_recvmsg,
.backlog_rcv = l2tp_ip6_backlog_recv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
#ifdef CONFIG_COMPAT
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be612a7..2caaa84ce92d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, session, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8e5ead366e7f..e925037fa0df 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -17,7 +17,7 @@
* @dev: targeted interface
*/
-int l3mdev_master_ifindex_rcu(struct net_device *dev)
+int l3mdev_master_ifindex_rcu(const struct net_device *dev)
{
int ifindex = 0;
@@ -28,8 +28,15 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
ifindex = dev->ifindex;
} else if (netif_is_l3_slave(dev)) {
struct net_device *master;
+ struct net_device *_dev = (struct net_device *)dev;
- master = netdev_master_upper_dev_get_rcu(dev);
+ /* netdev_master_upper_dev_get_rcu calls
+ * list_first_or_null_rcu to walk the upper dev list.
+ * list_first_or_null_rcu does not handle a const arg. We aren't
+ * making changes, just want the master device from that list so
+ * typecast to remove the const
+ */
+ master = netdev_master_upper_dev_get_rcu(_dev);
if (master)
ifindex = master->ifindex;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8dab4e569571..b3c52e3f689a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256];
static struct sockaddr_llc llc_ui_addrnull;
static const struct proto_ops llc_ui_ops;
-static int llc_ui_wait_for_conn(struct sock *sk, long timeout);
+static long llc_ui_wait_for_conn(struct sock *sk, long timeout);
static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
@@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
return rc;
}
-static int llc_ui_wait_for_conn(struct sock *sk, long timeout)
+static long llc_ui_wait_for_conn(struct sock *sk, long timeout)
{
DEFINE_WAIT(wait);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index e433d0c97e86..4ab5c522ceee 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = {
};
#endif
-static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
+static const char *hw_flag_names[] = {
#define FLAG(F) [IEEE80211_HW_##F] = #F
FLAG(HAS_RATE_CONTROL),
FLAG(RX_INCLUDES_FCS),
@@ -127,9 +127,6 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(BEACON_TX_STATUS),
FLAG(NEEDS_UNIQUE_STA_ADDR),
FLAG(SUPPORTS_REORDERING_BUFFER),
-
- /* keep last for the build bug below */
- (void *)0x1
#undef FLAG
};
@@ -149,7 +146,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
/* fail compilation if somebody adds or removes
* a flag without updating the name array above
*/
- BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
+ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS);
for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
if (test_bit(i, local->hw.flags))
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 9b983788ee51..fc3238376b39 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1739,7 +1739,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
- ieee80211_queue_work(&local->hw, &sdata->work);
}
mutex_unlock(&local->iflist_mtx);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6bcf0faa4a89..8190bf27ebff 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
/* wait for scan work complete */
flush_workqueue(local->workqueue);
+ flush_work(&local->sched_scan_stopped_work);
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
list_for_each_entry(sdata, &local->interfaces, list)
flush_delayed_work(&sdata->dec_tailroom_needed_wk);
ieee80211_scan_cancel(local);
+
+ /* make sure any new ROC will consider local->in_reconfig */
+ flush_delayed_work(&local->roc_work);
+ flush_work(&local->hw_roc_done);
+
ieee80211_reconfig(local);
rtnl_unlock();
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9a8e7b57c86e..d32cefcb63b0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1369,17 +1369,6 @@ out:
sdata_unlock(sdata);
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&local->hw, &sdata->work);
- rcu_read_unlock();
-}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index d941f3a73a4f..87c017a3b1ce 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -359,14 +359,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f41625bcd879..281b8d6e5109 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3980,8 +3980,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
- /* and do all the other regular work too */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 8b2f4eaac2ba..55a9c5b94ce1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -252,14 +252,11 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
unsigned long start_time)
{
- struct ieee80211_local *local = roc->sdata->local;
-
if (WARN_ON(roc->notified))
return;
roc->start_time = start_time;
roc->started = true;
- roc->hw_begun = true;
if (roc->mgmt_tx_cookie) {
if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
}
roc->notified = true;
-
- if (!local->ops->remain_on_channel)
- ieee80211_recalc_sw_work(local, start_time);
}
static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
if (!roc->started)
break;
+ roc->hw_begun = true;
ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
}
@@ -413,6 +408,10 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
return;
}
+ /* defer roc if driver is not started (i.e. during reconfig) */
+ if (local->in_reconfig)
+ return;
+
roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
list);
@@ -534,8 +533,10 @@ ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
* begin, otherwise they'll both be marked properly by the work
* struct that runs once the driver notifies us of the beginning
*/
- if (cur_roc->hw_begun)
+ if (cur_roc->hw_begun) {
+ new_roc->hw_begun = true;
ieee80211_handle_roc_started(new_roc, now);
+ }
return true;
}
@@ -658,6 +659,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
queued = true;
roc->on_channel = tmp->on_channel;
ieee80211_handle_roc_started(roc, now);
+ ieee80211_recalc_sw_work(local, now);
break;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a413e52f7691..ae980ce8daff 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
bool was_scanning = local->scanning;
struct cfg80211_scan_request *scan_req;
struct ieee80211_sub_if_data *scan_sdata;
+ struct ieee80211_sub_if_data *sdata;
lockdep_assert_held(&local->mtx);
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_mlme_notify_scan_completed(local);
ieee80211_ibss_notify_scan_completed(local);
- ieee80211_mesh_notify_scan_completed(local);
+
+ /* Requeue all the work that might have been ignored while
+ * the scan was in progress; if there was none this will
+ * just be a no-op for the particular interface.
+ */
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ }
+
if (was_scanning)
ieee80211_start_next_roc(local);
}
@@ -1213,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
trace_api_sched_scan_stopped(local);
+ /*
+ * this shouldn't really happen, so for simplicity
+ * simply ignore it, and let mac80211 reconfigure
+ * the sched scan later on.
+ */
+ if (local->in_reconfig)
+ return;
+
schedule_work(&local->sched_scan_stopped_work);
}
EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 87b7e7a7df6c..d20bab5c146c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1461,7 +1461,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
- if (reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+ if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
driver_release_tids =
BIT(find_highest_prio_tid(driver_release_tids));
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 5bad05e9af90..6101deb805a8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
struct ieee80211_hdr *hdr = (void *)skb->data;
int ac;
+ if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return;
+ }
+
/*
* This skb 'survived' a round-trip through the driver, and
* hopefully the driver didn't mangle it too badly. However,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 89f71799df84..7390de4946a9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2046,16 +2046,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
if (sched_scan_req->n_scan_plans > 1 ||
__ieee80211_request_sched_scan_start(sched_scan_sdata,
- sched_scan_req))
+ sched_scan_req)) {
+ RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+ RCU_INIT_POINTER(local->sched_scan_req, NULL);
sched_scan_stopped = true;
+ }
mutex_unlock(&local->mtx);
if (sched_scan_stopped)
cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
wake_up:
- local->in_reconfig = false;
- barrier();
+ if (local->in_reconfig) {
+ local->in_reconfig = false;
+ barrier();
+
+ /* Restart deferred ROCs */
+ mutex_lock(&local->mtx);
+ ieee80211_start_next_roc(local);
+ mutex_unlock(&local->mtx);
+ }
if (local->monitors == local->open_count && local->monitors > 0)
ieee80211_add_virtual_monitor(local);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index fb31aa87de81..644a8da6d4bd 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -227,5 +227,6 @@ static void __exit mpls_iptunnel_exit(void)
}
module_exit(mpls_iptunnel_exit);
+MODULE_ALIAS_RTNL_LWT(MPLS);
MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8c067e6663a1..95e757c377f9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -891,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
+ select NF_DUP_IPV6 if IPV6
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c9896fa3..f0f688db6213 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3264cb49b333..a3f5cd9b3c4c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -1019,8 +1019,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
- skb = iptunnel_handle_offloads(
- skb, false, __tun_gso_type_mask(AF_INET, cp->af));
+ skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -1112,8 +1111,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
- skb = iptunnel_handle_offloads(
- skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
+ skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
goto tx_error;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..f60b4fdeeb8c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+ spin_lock(lock);
+ while (unlikely(nf_conntrack_locks_all)) {
+ spin_unlock(lock);
+ spin_lock(&nf_conntrack_locks_all_lock);
+ spin_unlock(&nf_conntrack_locks_all_lock);
+ spin_lock(lock);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
{
h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
if (h1 <= h2) {
- spin_lock(&nf_conntrack_locks[h1]);
+ nf_conntrack_lock(&nf_conntrack_locks[h1]);
if (h1 != h2)
spin_lock_nested(&nf_conntrack_locks[h2],
SINGLE_DEPTH_NESTING);
} else {
- spin_lock(&nf_conntrack_locks[h2]);
+ nf_conntrack_lock(&nf_conntrack_locks[h2]);
spin_lock_nested(&nf_conntrack_locks[h1],
SINGLE_DEPTH_NESTING);
}
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
{
int i;
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_lock_nested(&nf_conntrack_locks[i], i);
+ spin_lock(&nf_conntrack_locks_all_lock);
+ nf_conntrack_locks_all = true;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++) {
+ spin_lock(&nf_conntrack_locks[i]);
+ spin_unlock(&nf_conntrack_locks[i]);
+ }
}
static void nf_conntrack_all_unlock(void)
{
- int i;
-
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_unlock(&nf_conntrack_locks[i]);
+ nf_conntrack_locks_all = false;
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
hash = hash_bucket(_hash, net);
for (; i < net->ct.htable_size; i++) {
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (read_seqcount_retry(&net->ct.generation, sequence)) {
spin_unlock(lockp);
goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
for (; *bucket < net->ct.htable_size; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (*bucket < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
}
spin_unlock(lockp);
local_bh_enable();
+ cond_resched();
}
for_each_possible_cpu(cpu) {
@@ -1406,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&pcpu->lock);
+ cond_resched();
}
return NULL;
found:
@@ -1422,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net,
struct nf_conn *ct;
unsigned int bucket = 0;
+ might_sleep();
+
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
@@ -1430,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net,
/* ... else the timer will get him soon. */
nf_ct_put(ct);
+ cond_resched();
}
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d31537905..3b40ec575cd5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dbb1bb3edb45..355e8552fd5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (cb->args[0] >= net->ct.htable_size) {
spin_unlock(lockp);
goto out;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index b6605e000801..5eefe4a355c6 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -224,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
nft_register_chain_type(&nft_filter_chain_netdev);
ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret < 0)
+ if (ret < 0) {
nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+ return ret;
+ }
register_netdevice_notifier(&nf_tables_netdev_notifier);
-
- return ret;
+ return 0;
}
static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba23353dab..2278d9ab723b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
-{
- return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
-
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
@@ -311,14 +304,14 @@ replay:
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
@@ -328,10 +321,12 @@ replay:
nlh = nlmsg_hdr(skb);
err = 0;
- if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
- skb->len < nlh->nlmsg_len) {
- err = -EINVAL;
- goto ack;
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+ nfnl_err_reset(&err_list);
+ status |= NFNL_BATCH_FAILURE;
+ goto done;
}
/* Only requests are handled by the kernel */
@@ -406,7 +401,7 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE;
goto done;
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 5d010f27ac01..2671b9deb103 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -307,7 +307,7 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ca932057c13..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
- skb = nfnetlink_alloc_skb(net, pkt_size,
- peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(pkt_size, GFP_ATOMIC);
}
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1d3936587ace..75429997ed41 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
__be32 **packet_id_ptr)
{
size_t size;
- size_t data_len = 0, cap_len = 0, rem_len = 0;
+ size_t data_len = 0, cap_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
- rem_len = data_len - hlen;
break;
}
@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
size += nla_total_size(seclen);
}
- skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
- GFP_ATOMIC);
+ skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
return NULL;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 383c17138399..b78c28ba465f 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -46,16 +46,14 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
- src64 = get_unaligned_be64(&src[i]);
- src64 = be64_to_cpu((__force __be64)src64);
+ src64 = get_unaligned((u64 *)&src[i]);
put_unaligned_be64(src64, &dst[i]);
}
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 8; i++) {
src64 = get_unaligned_be64(&src[i]);
- src64 = (__force u64)cpu_to_be64(src64);
- put_unaligned_be64(src64, &dst[i]);
+ put_unaligned(src64, (u64 *)&dst[i]);
}
break;
}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c7808fc19719..c9743f78f219 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -100,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
if (cpu_stats == NULL)
- return ENOMEM;
+ return -ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
@@ -138,7 +138,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
GFP_ATOMIC);
if (cpu_stats == NULL)
- return ENOMEM;
+ return -ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a0eb2161e3ef..d4a4619fcebc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -127,6 +127,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
NF_CT_LABELS_MAX_SIZE - size);
return;
}
+#endif
case NFT_CT_BYTES: /* fallthrough */
case NFT_CT_PKTS: {
const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
memcpy(dest, &count, sizeof(count));
return;
}
-#endif
default:
break;
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b7c43def0dc6..e118397254af 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
- __be16 frag_off;
+ __be16 frag_off, oldlen, newlen;
int tcphoff;
int ret;
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return NF_DROP;
if (ret > 0) {
ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+ oldlen = ipv6h->payload_len;
+ newlen = htons(ntohs(oldlen) + ret);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+ newlen);
+ ipv6h->payload_len = newlen;
}
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3eff7b67cdf2..6e57a3966dc5 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -131,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
* belonging to established connections going through that one.
*/
static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
- &iph->saddr, laddr,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ tproto, &iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2ec08f04b816..49d14ecad444 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
* box.
*/
static struct sock *
-xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return __inet_lookup(net, &tcp_hashinfo,
+ return __inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
}
#endif
- return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
- sport, dport, indev);
+ return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
+ daddr, sport, dport, indev);
}
static bool
@@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
}
static struct sock *
-xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo,
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
sport = hp->source;
daddr = &iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
struct ipv6hdr ipv6_var;
@@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
return NULL;
}
- return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
+ return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
sport, dport, indev);
}
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 2c5e95e9bfbd..5d6e8c05b3d4 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -2,15 +2,6 @@
# Netlink Sockets
#
-config NETLINK_MMAP
- bool "NETLINK: mmaped IO"
- ---help---
- This option enables support for memory mapped netlink IO. This
- reduces overhead by avoiding copying data between kernel- and
- userspace.
-
- If unsure, say N.
-
config NETLINK_DIAG
tristate "NETLINK: socket monitoring interface"
default n
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81dc1bb6e016..c8416792cce0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
dev_hold(dev);
- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
+ if (is_vmalloc_addr(skb->head))
nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
else
nskb = skb_clone(skb, GFP_ATOMIC);
@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
wake_up_interruptible(&nlk->wait);
}
-#ifdef CONFIG_NETLINK_MMAP
-static bool netlink_rx_is_mmaped(struct sock *sk)
-{
- return nlk_sk(sk)->rx_ring.pg_vec != NULL;
-}
-
-static bool netlink_tx_is_mmaped(struct sock *sk)
-{
- return nlk_sk(sk)->tx_ring.pg_vec != NULL;
-}
-
-static __pure struct page *pgvec_to_page(const void *addr)
-{
- if (is_vmalloc_addr(addr))
- return vmalloc_to_page(addr);
- else
- return virt_to_page(addr);
-}
-
-static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
-{
- unsigned int i;
-
- for (i = 0; i < len; i++) {
- if (pg_vec[i] != NULL) {
- if (is_vmalloc_addr(pg_vec[i]))
- vfree(pg_vec[i]);
- else
- free_pages((unsigned long)pg_vec[i], order);
- }
- }
- kfree(pg_vec);
-}
-
-static void *alloc_one_pg_vec_page(unsigned long order)
-{
- void *buffer;
- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
- __GFP_NOWARN | __GFP_NORETRY;
-
- buffer = (void *)__get_free_pages(gfp_flags, order);
- if (buffer != NULL)
- return buffer;
-
- buffer = vzalloc((1 << order) * PAGE_SIZE);
- if (buffer != NULL)
- return buffer;
-
- gfp_flags &= ~__GFP_NORETRY;
- return (void *)__get_free_pages(gfp_flags, order);
-}
-
-static void **alloc_pg_vec(struct netlink_sock *nlk,
- struct nl_mmap_req *req, unsigned int order)
-{
- unsigned int block_nr = req->nm_block_nr;
- unsigned int i;
- void **pg_vec;
-
- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
- if (pg_vec == NULL)
- return NULL;
-
- for (i = 0; i < block_nr; i++) {
- pg_vec[i] = alloc_one_pg_vec_page(order);
- if (pg_vec[i] == NULL)
- goto err1;
- }
-
- return pg_vec;
-err1:
- free_pg_vec(pg_vec, order, block_nr);
- return NULL;
-}
-
-
-static void
-__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
- unsigned int order)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct sk_buff_head *queue;
- struct netlink_ring *ring;
-
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
-
- if (pg_vec)
- free_pg_vec(pg_vec, order, req->nm_block_nr);
-}
-
-static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool tx_ring)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- void **pg_vec = NULL;
- unsigned int order = 0;
-
- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
-
- if (req->nm_block_nr) {
- if (ring->pg_vec != NULL)
- return -EBUSY;
-
- if ((int)req->nm_block_size <= 0)
- return -EINVAL;
- if (!PAGE_ALIGNED(req->nm_block_size))
- return -EINVAL;
- if (req->nm_frame_size < NL_MMAP_HDRLEN)
- return -EINVAL;
- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
- return -EINVAL;
-
- ring->frames_per_block = req->nm_block_size /
- req->nm_frame_size;
- if (ring->frames_per_block == 0)
- return -EINVAL;
- if (ring->frames_per_block * req->nm_block_nr !=
- req->nm_frame_nr)
- return -EINVAL;
-
- order = get_order(req->nm_block_size);
- pg_vec = alloc_pg_vec(nlk, req, order);
- if (pg_vec == NULL)
- return -ENOMEM;
- } else {
- if (req->nm_frame_nr)
- return -EINVAL;
- }
-
- mutex_lock(&nlk->pg_vec_lock);
- if (atomic_read(&nlk->mapped) == 0) {
- __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
- mutex_unlock(&nlk->pg_vec_lock);
- return 0;
- }
-
- mutex_unlock(&nlk->pg_vec_lock);
-
- if (pg_vec)
- free_pg_vec(pg_vec, order, req->nm_block_nr);
-
- return -EBUSY;
-}
-
-static void netlink_mm_open(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct socket *sock = file->private_data;
- struct sock *sk = sock->sk;
-
- if (sk)
- atomic_inc(&nlk_sk(sk)->mapped);
-}
-
-static void netlink_mm_close(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct socket *sock = file->private_data;
- struct sock *sk = sock->sk;
-
- if (sk)
- atomic_dec(&nlk_sk(sk)->mapped);
-}
-
-static const struct vm_operations_struct netlink_mmap_ops = {
- .open = netlink_mm_open,
- .close = netlink_mm_close,
-};
-
-static int netlink_mmap(struct file *file, struct socket *sock,
- struct vm_area_struct *vma)
-{
- struct sock *sk = sock->sk;
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- unsigned long start, size, expected;
- unsigned int i;
- int err = -EINVAL;
-
- if (vma->vm_pgoff)
- return -EINVAL;
-
- mutex_lock(&nlk->pg_vec_lock);
-
- expected = 0;
- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
- if (ring->pg_vec == NULL)
- continue;
- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
- }
-
- if (expected == 0)
- goto out;
-
- size = vma->vm_end - vma->vm_start;
- if (size != expected)
- goto out;
-
- start = vma->vm_start;
- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
- if (ring->pg_vec == NULL)
- continue;
-
- for (i = 0; i < ring->pg_vec_len; i++) {
- struct page *page;
- void *kaddr = ring->pg_vec[i];
- unsigned int pg_num;
-
- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
- page = pgvec_to_page(kaddr);
- err = vm_insert_page(vma, start, page);
- if (err < 0)
- goto out;
- start += PAGE_SIZE;
- kaddr += PAGE_SIZE;
- }
- }
- }
-
- atomic_inc(&nlk->mapped);
- vma->vm_ops = &netlink_mmap_ops;
- err = 0;
-out:
- mutex_unlock(&nlk->pg_vec_lock);
- return err;
-}
-
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
-{
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
- struct page *p_start, *p_end;
-
- /* First page is flushed through netlink_{get,set}_status */
- p_start = pgvec_to_page(hdr + PAGE_SIZE);
- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
- while (p_start <= p_end) {
- flush_dcache_page(p_start);
- p_start++;
- }
-#endif
-}
-
-static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
-{
- smp_rmb();
- flush_dcache_page(pgvec_to_page(hdr));
- return hdr->nm_status;
-}
-
-static void netlink_set_status(struct nl_mmap_hdr *hdr,
- enum nl_mmap_status status)
-{
- smp_mb();
- hdr->nm_status = status;
- flush_dcache_page(pgvec_to_page(hdr));
-}
-
-static struct nl_mmap_hdr *
-__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
-{
- unsigned int pg_vec_pos, frame_off;
-
- pg_vec_pos = pos / ring->frames_per_block;
- frame_off = pos % ring->frames_per_block;
-
- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
-}
-
-static struct nl_mmap_hdr *
-netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
- enum nl_mmap_status status)
-{
- struct nl_mmap_hdr *hdr;
-
- hdr = __netlink_lookup_frame(ring, pos);
- if (netlink_get_status(hdr) != status)
- return NULL;
-
- return hdr;
-}
-
-static struct nl_mmap_hdr *
-netlink_current_frame(const struct netlink_ring *ring,
- enum nl_mmap_status status)
-{
- return netlink_lookup_frame(ring, ring->head, status);
-}
-
-static void netlink_increment_head(struct netlink_ring *ring)
-{
- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
-}
-
-static void netlink_forward_ring(struct netlink_ring *ring)
-{
- unsigned int head = ring->head;
- const struct nl_mmap_hdr *hdr;
-
- do {
- hdr = __netlink_lookup_frame(ring, ring->head);
- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
- break;
- if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
- break;
- netlink_increment_head(ring);
- } while (ring->head != head);
-}
-
-static bool netlink_has_valid_frame(struct netlink_ring *ring)
-{
- unsigned int head = ring->head, pos = head;
- const struct nl_mmap_hdr *hdr;
-
- do {
- hdr = __netlink_lookup_frame(ring, pos);
- if (hdr->nm_status == NL_MMAP_STATUS_VALID)
- return true;
- pos = pos != 0 ? pos - 1 : ring->frame_max;
- } while (pos != head);
-
- return false;
-}
-
-static bool netlink_dump_space(struct netlink_sock *nlk)
-{
- struct netlink_ring *ring = &nlk->rx_ring;
- struct nl_mmap_hdr *hdr;
- unsigned int n;
-
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL)
- return false;
-
- n = ring->head + ring->frame_max / 2;
- if (n > ring->frame_max)
- n -= ring->frame_max;
-
- hdr = __netlink_lookup_frame(ring, n);
-
- return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
-}
-
-static unsigned int netlink_poll(struct file *file, struct socket *sock,
- poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct netlink_sock *nlk = nlk_sk(sk);
- unsigned int mask;
- int err;
-
- if (nlk->rx_ring.pg_vec != NULL) {
- /* Memory mapped sockets don't call recvmsg(), so flow control
- * for dumps is performed here. A dump is allowed to continue
- * if at least half the ring is unused.
- */
- while (nlk->cb_running && netlink_dump_space(nlk)) {
- err = netlink_dump(sk);
- if (err < 0) {
- sk->sk_err = -err;
- sk->sk_error_report(sk);
- break;
- }
- }
- netlink_rcv_wake(sk);
- }
-
- mask = datagram_poll(file, sock, wait);
-
- /* We could already have received frames in the normal receive
- * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
- * so if mask contains pollin/etc already, there's no point
- * walking the ring.
- */
- if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (nlk->rx_ring.pg_vec) {
- if (netlink_has_valid_frame(&nlk->rx_ring))
- mask |= POLLIN | POLLRDNORM;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- }
-
- spin_lock_bh(&sk->sk_write_queue.lock);
- if (nlk->tx_ring.pg_vec) {
- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
- mask |= POLLOUT | POLLWRNORM;
- }
- spin_unlock_bh(&sk->sk_write_queue.lock);
-
- return mask;
-}
-
-static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
-{
- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
-}
-
-static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
- struct netlink_ring *ring,
- struct nl_mmap_hdr *hdr)
-{
- unsigned int size;
- void *data;
-
- size = ring->frame_size - NL_MMAP_HDRLEN;
- data = (void *)hdr + NL_MMAP_HDRLEN;
-
- skb->head = data;
- skb->data = data;
- skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
- skb->len = 0;
-
- skb->destructor = netlink_skb_destructor;
- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
- NETLINK_CB(skb).sk = sk;
-}
-
-static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
- u32 dst_portid, u32 dst_group,
- struct scm_cookie *scm)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- struct nl_mmap_hdr *hdr;
- struct sk_buff *skb;
- unsigned int maxlen;
- int err = 0, len = 0;
-
- mutex_lock(&nlk->pg_vec_lock);
-
- ring = &nlk->tx_ring;
- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
-
- do {
- unsigned int nm_len;
-
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
- if (hdr == NULL) {
- if (!(msg->msg_flags & MSG_DONTWAIT) &&
- atomic_read(&nlk->tx_ring.pending))
- schedule();
- continue;
- }
-
- nm_len = ACCESS_ONCE(hdr->nm_len);
- if (nm_len > maxlen) {
- err = -EINVAL;
- goto out;
- }
-
- netlink_frame_flush_dcache(hdr, nm_len);
-
- skb = alloc_skb(nm_len, GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- __skb_put(skb, nm_len);
- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
-
- netlink_increment_head(ring);
-
- NETLINK_CB(skb).portid = nlk->portid;
- NETLINK_CB(skb).dst_group = dst_group;
- NETLINK_CB(skb).creds = scm->creds;
-
- err = security_netlink_send(sk, skb);
- if (err) {
- kfree_skb(skb);
- goto out;
- }
-
- if (unlikely(dst_group)) {
- atomic_inc(&skb->users);
- netlink_broadcast(sk, skb, dst_portid, dst_group,
- GFP_KERNEL);
- }
- err = netlink_unicast(sk, skb, dst_portid,
- msg->msg_flags & MSG_DONTWAIT);
- if (err < 0)
- goto out;
- len += err;
-
- } while (hdr != NULL ||
- (!(msg->msg_flags & MSG_DONTWAIT) &&
- atomic_read(&nlk->tx_ring.pending)));
-
- if (len > 0)
- err = len;
-out:
- mutex_unlock(&nlk->pg_vec_lock);
- return err;
-}
-
-static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
-{
- struct nl_mmap_hdr *hdr;
-
- hdr = netlink_mmap_hdr(skb);
- hdr->nm_len = skb->len;
- hdr->nm_group = NETLINK_CB(skb).dst_group;
- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_frame_flush_dcache(hdr, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
-
- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
- kfree_skb(skb);
-}
-
-static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring = &nlk->rx_ring;
- struct nl_mmap_hdr *hdr;
-
- spin_lock_bh(&sk->sk_receive_queue.lock);
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL) {
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- kfree_skb(skb);
- netlink_overrun(sk);
- return;
- }
- netlink_increment_head(ring);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
-
- hdr->nm_len = skb->len;
- hdr->nm_group = NETLINK_CB(skb).dst_group;
- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
-}
-
-#else /* CONFIG_NETLINK_MMAP */
-#define netlink_rx_is_mmaped(sk) false
-#define netlink_tx_is_mmaped(sk) false
-#define netlink_mmap sock_no_mmap
-#define netlink_poll datagram_poll
-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
-#endif /* CONFIG_NETLINK_MMAP */
-
static void netlink_skb_destructor(struct sk_buff *skb)
{
-#ifdef CONFIG_NETLINK_MMAP
- struct nl_mmap_hdr *hdr;
- struct netlink_ring *ring;
- struct sock *sk;
-
- /* If a packet from the kernel to userspace was freed because of an
- * error without being delivered to userspace, the kernel must reset
- * the status. In the direction userspace to kernel, the status is
- * always reset here after the packet was processed and freed.
- */
- if (netlink_skb_is_mmaped(skb)) {
- hdr = netlink_mmap_hdr(skb);
- sk = NETLINK_CB(skb).sk;
-
- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
- ring = &nlk_sk(sk)->tx_ring;
- } else {
- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
- hdr->nm_len = 0;
- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
- }
- ring = &nlk_sk(sk)->rx_ring;
- }
-
- WARN_ON(atomic_read(&ring->pending) == 0);
- atomic_dec(&ring->pending);
- sock_put(sk);
-
- skb->head = NULL;
- }
-#endif
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
@@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk)
}
skb_queue_purge(&sk->sk_receive_queue);
-#ifdef CONFIG_NETLINK_MMAP
- if (1) {
- struct nl_mmap_req req;
-
- memset(&req, 0, sizeof(req));
- if (nlk->rx_ring.pg_vec)
- __netlink_set_ring(sk, &req, false, NULL, 0);
- memset(&req, 0, sizeof(req));
- if (nlk->tx_ring.pg_vec)
- __netlink_set_ring(sk, &req, true, NULL, 0);
- }
-#endif /* CONFIG_NETLINK_MMAP */
if (!sock_flag(sk, SOCK_DEAD)) {
printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
mutex_init(nlk->cb_mutex);
}
init_waitqueue_head(&nlk->wait);
-#ifdef CONFIG_NETLINK_MMAP
- mutex_init(&nlk->pg_vec_lock);
-#endif
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
@@ -1728,8 +1111,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !netlink_skb_is_mmaped(skb)) {
+ test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
if (!ssk || netlink_is_kernel(ssk))
@@ -1767,14 +1149,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
netlink_deliver_tap(skb);
-#ifdef CONFIG_NETLINK_MMAP
- if (netlink_skb_is_mmaped(skb))
- netlink_queue_mmaped_skb(sk, skb);
- else if (netlink_rx_is_mmaped(sk))
- netlink_ring_set_copied(sk, skb);
- else
-#endif /* CONFIG_NETLINK_MMAP */
- skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
return len;
}
@@ -1798,9 +1173,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
int delta;
WARN_ON(skb->sk != NULL);
- if (netlink_skb_is_mmaped(skb))
- return skb;
-
delta = skb->end - skb->tail;
if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
return skb;
@@ -1876,79 +1248,6 @@ retry:
}
EXPORT_SYMBOL(netlink_unicast);
-struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
- unsigned int ldiff, u32 dst_portid,
- gfp_t gfp_mask)
-{
-#ifdef CONFIG_NETLINK_MMAP
- unsigned int maxlen, linear_size;
- struct sock *sk = NULL;
- struct sk_buff *skb;
- struct netlink_ring *ring;
- struct nl_mmap_hdr *hdr;
-
- sk = netlink_getsockbyportid(ssk, dst_portid);
- if (IS_ERR(sk))
- goto out;
-
- ring = &nlk_sk(sk)->rx_ring;
- /* fast-path without atomic ops for common case: non-mmaped receiver */
- if (ring->pg_vec == NULL)
- goto out_put;
-
- /* We need to account the full linear size needed as a ring
- * slot cannot have non-linear parts.
- */
- linear_size = size + ldiff;
- if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
- goto out_put;
-
- skb = alloc_skb_head(gfp_mask);
- if (skb == NULL)
- goto err1;
-
- spin_lock_bh(&sk->sk_receive_queue.lock);
- /* check again under lock */
- if (ring->pg_vec == NULL)
- goto out_free;
-
- /* check again under lock */
- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
- if (maxlen < linear_size)
- goto out_free;
-
- netlink_forward_ring(ring);
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL)
- goto err2;
-
- netlink_ring_setup_skb(skb, sk, ring, hdr);
- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
- atomic_inc(&ring->pending);
- netlink_increment_head(ring);
-
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return skb;
-
-err2:
- kfree_skb(skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- netlink_overrun(sk);
-err1:
- sock_put(sk);
- return NULL;
-
-out_free:
- kfree_skb(skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
-out_put:
- sock_put(sk);
-out:
-#endif
- return alloc_skb(size, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
-
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
@@ -2225,8 +1524,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
- optlen >= sizeof(int) &&
+ if (optlen >= sizeof(int) &&
get_user(val, (unsigned int __user *)optval))
return -EFAULT;
@@ -2279,25 +1577,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
err = 0;
break;
-#ifdef CONFIG_NETLINK_MMAP
- case NETLINK_RX_RING:
- case NETLINK_TX_RING: {
- struct nl_mmap_req req;
-
- /* Rings might consume more memory than queue limits, require
- * CAP_NET_ADMIN.
- */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- if (optlen < sizeof(req))
- return -EINVAL;
- if (copy_from_user(&req, optval, sizeof(req)))
- return -EFAULT;
- err = netlink_set_ring(sk, &req,
- optname == NETLINK_TX_RING);
- break;
- }
-#endif /* CONFIG_NETLINK_MMAP */
case NETLINK_LISTEN_ALL_NSID:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
return -EPERM;
@@ -2467,18 +1746,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
smp_rmb();
}
- /* It's a really convoluted way for userland to ask for mmaped
- * sendmsg(), but that's what we've got...
- */
- if (netlink_tx_is_mmaped(sk) &&
- iter_is_iovec(&msg->msg_iter) &&
- msg->msg_iter.nr_segs == 1 &&
- msg->msg_iter.iov->iov_base == NULL) {
- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
- &scm);
- goto out;
- }
-
err = -EMSGSIZE;
if (len > sk->sk_sndbuf - 32)
goto out;
@@ -2794,8 +2061,7 @@ static int netlink_dump(struct sock *sk)
goto errout_skb;
}
- if (!netlink_rx_is_mmaped(sk) &&
- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
goto errout_skb;
/* NLMSG_GOODSIZE is small to avoid high order allocations being
@@ -2808,15 +2074,12 @@ static int netlink_dump(struct sock *sk)
if (alloc_min_size < nlk->max_recvmsg_len) {
alloc_size = nlk->max_recvmsg_len;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
- GFP_KERNEL |
- __GFP_NOWARN |
- __GFP_NORETRY);
+ skb = alloc_skb(alloc_size, GFP_KERNEL |
+ __GFP_NOWARN | __GFP_NORETRY);
}
if (!skb) {
alloc_size = alloc_min_size;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
- GFP_KERNEL);
+ skb = alloc_skb(alloc_size, GFP_KERNEL);
}
if (!skb)
goto errout_skb;
@@ -2883,16 +2146,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct netlink_sock *nlk;
int ret;
- /* Memory mapped dump requests need to be copied to avoid looping
- * on the pending state in netlink_mmap_sendmsg() while the CB hold
- * a reference to the skb.
- */
- if (netlink_skb_is_mmaped(skb)) {
- skb = skb_copy(skb, GFP_KERNEL);
- if (skb == NULL)
- return -ENOBUFS;
- } else
- atomic_inc(&skb->users);
+ atomic_inc(&skb->users);
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
if (sk == NULL) {
@@ -2965,8 +2219,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
payload += nlmsg_len(nlh);
- skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
- NETLINK_CB(in_skb).portid, GFP_KERNEL);
+ skb = nlmsg_new(payload, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -3240,7 +2493,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll = netlink_poll,
+ .poll = datagram_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -3248,7 +2501,7 @@ static const struct proto_ops netlink_ops = {
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
- .mmap = netlink_mmap,
+ .mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 14437d9b1965..e68ef9ccd703 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -44,12 +44,6 @@ struct netlink_sock {
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
struct module *module;
-#ifdef CONFIG_NETLINK_MMAP
- struct mutex pg_vec_lock;
- struct netlink_ring rx_ring;
- struct netlink_ring tx_ring;
- atomic_t mapped;
-#endif /* CONFIG_NETLINK_MMAP */
struct rhash_head node;
struct rcu_head rcu;
@@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
-static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NETLINK_MMAP
- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
-#else
- return false;
-#endif /* CONFIG_NETLINK_MMAP */
-}
-
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 3ee63a3cff30..8dd836a8dd60 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -8,41 +8,6 @@
#include "af_netlink.h"
-#ifdef CONFIG_NETLINK_MMAP
-static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
- struct sk_buff *nlskb)
-{
- struct netlink_diag_ring ndr;
-
- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
- ndr.ndr_block_nr = ring->pg_vec_len;
- ndr.ndr_frame_size = ring->frame_size;
- ndr.ndr_frame_nr = ring->frame_max + 1;
-
- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
-}
-
-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- int ret;
-
- mutex_lock(&nlk->pg_vec_lock);
- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
- if (!ret)
- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
- nlskb);
- mutex_unlock(&nlk->pg_vec_lock);
-
- return ret;
-}
-#else
-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
-{
- return 0;
-}
-#endif
-
static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
- sk_diag_put_rings_cfg(sk, skb))
- goto out_nlmsg_trim;
-
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d3f6b063467b..a09132a69869 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -185,7 +185,7 @@ static int genl_allocate_reserve_groups(int n_groups, int *first_id)
}
}
- if (id + n_groups >= mc_groups_longs * BITS_PER_LONG) {
+ if (id + n_groups > mc_groups_longs * BITS_PER_LONG) {
unsigned long new_longs = mc_groups_longs +
BITS_TO_LONGS(n_groups);
size_t nlen = new_longs * sizeof(unsigned long);
@@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family)
EXPORT_SYMBOL(genl_unregister_family);
/**
- * genlmsg_new_unicast - Allocate generic netlink message for unicast
- * @payload: size of the message payload
- * @info: information on destination
- * @flags: the type of memory to allocate
- *
- * Allocates a new sk_buff large enough to cover the specified payload
- * plus required Netlink headers. Will check receiving socket for
- * memory mapped i/o capability and use it if enabled. Will fall back
- * to non-mapped skb if message size exceeds the frame size of the ring.
- */
-struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
- gfp_t flags)
-{
- size_t len = nlmsg_total_size(genlmsg_total_size(payload));
-
- return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
-}
-EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
-
-/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
* @portid: netlink portid the message is addressed to
@@ -580,6 +560,10 @@ static int genl_family_rcv_msg(struct genl_family *family,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
+ if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
int rc;
@@ -638,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
- info.dst_sk = skb->sk;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index d143aa9f6654..cd5fd9d728a7 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,6 +10,7 @@ config OPENVSWITCH
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
+ select DST_CACHE
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c88d0f2d3e01..e9dd47b2a85b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
new_mpls_lse = (__be32 *)skb_mpls_header(skb);
*new_mpls_lse = mpls->mpls_lse;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
- MPLS_HLEN, 0));
+ skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
mask->eth_dst);
- ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+ skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
/* Reconstruct the MAC header. */
skb_push(skb, data->l2_len);
memcpy(skb->data, &data->l2_data, data->l2_len);
- ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+ skb_postpush_rcsum(skb, skb->data, data->l2_len);
skb_reset_mac_header(skb);
ovs_vport_send(vport, skb);
@@ -1160,17 +1158,26 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *acts,
struct sw_flow_key *key)
{
- int level = this_cpu_read(exec_actions_level);
- int err;
+ static const int ovs_recursion_limit = 5;
+ int err, level;
+
+ level = __this_cpu_inc_return(exec_actions_level);
+ if (unlikely(level > ovs_recursion_limit)) {
+ net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
+ ovs_dp_name(dp));
+ kfree_skb(skb);
+ err = -ENETDOWN;
+ goto out;
+ }
- this_cpu_inc(exec_actions_level);
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
- if (!level)
+ if (level == 1)
process_deferred_actions(dp);
- this_cpu_dec(exec_actions_level);
+out:
+ __this_cpu_dec(exec_actions_level);
return err;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 91a8b004dc51..e6a7d494df24 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -336,12 +336,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
- struct ovs_skb_cb ovs_cb;
int err;
- ovs_cb = *OVS_CB(skb);
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
- *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
@@ -359,7 +357,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- *OVS_CB(skb) = ovs_cb;
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
@@ -425,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb = NULL; /* to be queued to userspace */
struct nlattr *nla;
- struct genl_info info = {
- .dst_sk = ovs_dp_get_net(dp)->genl_sock,
- .snd_portid = upcall_info->portid,
- };
size_t len;
unsigned int hlen;
int err, dp_ifindex;
@@ -469,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
hlen = skb->len;
len = upcall_msg_size(upcall_info, hlen);
- user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
+ user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
@@ -657,7 +650,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
static const struct genl_ops dp_packet_genl_ops[] = {
{ .cmd = OVS_PACKET_CMD_EXECUTE,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = packet_policy,
.doit = ovs_packet_cmd_execute
}
@@ -879,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
return NULL;
len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
- skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
+ skb = genlmsg_new(len, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -1394,12 +1387,12 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
static const struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = ovs_flow_cmd_del
},
@@ -1410,7 +1403,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
.dumpit = ovs_flow_cmd_dump
},
{ .cmd = OVS_FLOW_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = ovs_flow_cmd_set,
},
@@ -1484,9 +1477,9 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
+static struct sk_buff *ovs_dp_cmd_alloc_info(void)
{
- return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
+ return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
}
/* Called with rcu_read_lock or ovs_mutex. */
@@ -1539,7 +1532,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1660,7 +1653,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1693,7 +1686,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1726,7 +1719,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1780,12 +1773,12 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
static const struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_new
},
{ .cmd = OVS_DP_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_del
},
@@ -1796,7 +1789,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
.dumpit = ovs_dp_cmd_dump
},
{ .cmd = OVS_DP_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_set,
},
@@ -1915,6 +1908,29 @@ static struct vport *lookup_vport(struct net *net,
return ERR_PTR(-EINVAL);
}
+/* Called with ovs_mutex */
+static void update_headroom(struct datapath *dp)
+{
+ unsigned dev_headroom, max_headroom = 0;
+ struct net_device *dev;
+ struct vport *vport;
+ int i;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ dev = vport->dev;
+ dev_headroom = netdev_get_fwd_headroom(dev);
+ if (dev_headroom > max_headroom)
+ max_headroom = dev_headroom;
+ }
+ }
+
+ dp->max_headroom = max_headroom;
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ netdev_set_rx_headroom(vport->dev, max_headroom);
+}
+
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1980,6 +1996,12 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+
+ if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
+ update_headroom(dp);
+ else
+ netdev_set_rx_headroom(vport->dev, dp->max_headroom);
+
BUG_ON(err < 0);
ovs_unlock();
@@ -2046,8 +2068,10 @@ exit_unlock_free:
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
+ bool must_update_headroom = false;
struct nlattr **a = info->attrs;
struct sk_buff *reply;
+ struct datapath *dp;
struct vport *vport;
int err;
@@ -2069,7 +2093,16 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_DEL);
BUG_ON(err < 0);
+
+ /* the vport deletion may trigger dp headroom update */
+ dp = vport->dp;
+ if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
+ must_update_headroom = true;
+ netdev_reset_rx_headroom(vport->dev);
ovs_dp_detach_port(vport);
+
+ if (must_update_headroom)
+ update_headroom(dp);
ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2161,12 +2194,12 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
static const struct genl_ops dp_vport_genl_ops[] = {
{ .cmd = OVS_VPORT_CMD_NEW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
.doit = ovs_vport_cmd_new
},
{ .cmd = OVS_VPORT_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
.doit = ovs_vport_cmd_del
},
@@ -2177,7 +2210,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
.dumpit = ovs_vport_cmd_dump
},
{ .cmd = OVS_VPORT_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
.doit = ovs_vport_cmd_set,
},
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 67bdecd9fdc1..427e39a045cf 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -68,6 +68,8 @@ struct dp_stats_percpu {
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
+ * @max_headroom: the maximum headroom of all vports in this datapath; it will
+ * be used by all the internal vports in this dp.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -89,6 +91,8 @@ struct datapath {
possible_net_t net;
u32 user_features;
+
+ u32 max_headroom;
};
/**
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d1bd4a45ca2d..58b8efc23668 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (!tun_dst)
return -ENOMEM;
+ err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
+ if (err) {
+ dst_release((struct dst_entry *)tun_dst);
+ return err;
+ }
+
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
sizeof(*ovs_tun), log);
if (IS_ERR(a)) {
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index ec76398a792f..83a5534abd31 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -138,6 +138,11 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
return stats;
}
+void internal_set_rx_headroom(struct net_device *dev, int new_hr)
+{
+ dev->needed_headroom = new_hr;
+}
+
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
@@ -145,6 +150,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_get_stats,
+ .ndo_set_rx_headroom = internal_set_rx_headroom,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -158,7 +164,8 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
- netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
+ IFF_PHONY_HEADROOM;
netdev->destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -199,6 +206,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_netdev;
}
+ vport->dev->needed_headroom = vport->dp->max_headroom;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6a6adf314363..4e3972344aa6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb)
return;
skb_push(skb, ETH_HLEN);
- ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+ skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1605691d9414..5eb7694348b5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
int err;
struct vxlan_config conf = {
.no_share = true,
- .flags = VXLAN_F_COLLECT_METADATA,
+ .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+ /* Don't restrict the packets that can be sent by MTU */
+ .mtu = IP_MAX_MTU,
};
if (!options) {
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index c10899cb9040..f01f28a567ad 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -185,13 +185,6 @@ static inline struct vport *vport_from_priv(void *priv)
int ovs_vport_receive(struct vport *, struct sk_buff *,
const struct ip_tunnel_info *);
-static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
- const void *start, unsigned int len)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-}
-
static inline const char *ovs_vport_name(struct vport *vport)
{
return vport->dev->name;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396aa635c..d41b1074cb2d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -557,9 +557,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
{
struct net_device *dev;
unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
- struct ethtool_cmd ecmd;
+ struct ethtool_link_ksettings ecmd;
int err;
- u32 speed;
rtnl_lock();
dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
@@ -567,19 +566,19 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
rtnl_unlock();
return DEFAULT_PRB_RETIRE_TOV;
}
- err = __ethtool_get_settings(dev, &ecmd);
- speed = ethtool_cmd_speed(&ecmd);
+ err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
if (!err) {
/*
* If the link speed is so slow you don't really
* need to worry about perf anyways
*/
- if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) {
+ if (ecmd.base.speed < SPEED_1000 ||
+ ecmd.base.speed == SPEED_UNKNOWN) {
return DEFAULT_PRB_RETIRE_TOV;
} else {
msec = 1;
- div = speed / 1000;
+ div = ecmd.base.speed / 1000;
}
}
@@ -1960,6 +1959,64 @@ static unsigned int run_filter(struct sk_buff *skb,
return res;
}
+static int __packet_rcv_vnet(const struct sk_buff *skb,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ *vnet_hdr = (const struct virtio_net_hdr) { 0 };
+
+ if (skb_is_gso(skb)) {
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ /* This is a hint as to how much should be linear. */
+ vnet_hdr->hdr_len =
+ __cpu_to_virtio16(vio_le(), skb_headlen(skb));
+ vnet_hdr->gso_size =
+ __cpu_to_virtio16(vio_le(), sinfo->gso_size);
+
+ if (sinfo->gso_type & SKB_GSO_TCPV4)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ else if (sinfo->gso_type & SKB_GSO_TCPV6)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ else if (sinfo->gso_type & SKB_GSO_FCOE)
+ return -EINVAL;
+ else
+ BUG();
+
+ if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+ vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+ } else
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
+ skb_checksum_start_offset(skb));
+ vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
+ skb->csum_offset);
+ } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ } /* else everything is zero */
+
+ return 0;
+}
+
+static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
+ size_t *len)
+{
+ struct virtio_net_hdr vnet_hdr;
+
+ if (*len < sizeof(vnet_hdr))
+ return -EINVAL;
+ *len -= sizeof(vnet_hdr);
+
+ if (__packet_rcv_vnet(skb, &vnet_hdr))
+ return -EINVAL;
+
+ return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+}
+
/*
* This function makes lazy skb cloning in hope that most of packets
* are discarded by BPF.
@@ -2148,7 +2205,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
unsigned int maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
- po->tp_reserve;
+ po->tp_reserve;
+ if (po->has_vnet_hdr)
+ netoff += sizeof(struct virtio_net_hdr);
macoff = netoff - maclen;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2185,7 +2244,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.raw = packet_current_rx_frame(po, skb,
TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw)
- goto ring_is_full;
+ goto drop_n_account;
if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring);
/*
@@ -2204,6 +2263,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
+ if (po->has_vnet_hdr) {
+ if (__packet_rcv_vnet(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr))) {
+ spin_lock(&sk->sk_receive_queue.lock);
+ goto drop_n_account;
+ }
+ }
+
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2299,7 +2366,7 @@ drop:
kfree_skb(skb);
return 0;
-ring_is_full:
+drop_n_account:
po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
@@ -2347,15 +2414,92 @@ static void tpacket_set_protocol(const struct net_device *dev,
}
}
+static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
+{
+ unsigned short gso_type = 0;
+
+ if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+ (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
+ __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
+ vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
+
+ if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
+ return -EINVAL;
+
+ if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ gso_type = SKB_GSO_TCPV4;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ gso_type = SKB_GSO_TCPV6;
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ gso_type = SKB_GSO_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+ gso_type |= SKB_GSO_TCP_ECN;
+
+ if (vnet_hdr->gso_size == 0)
+ return -EINVAL;
+ }
+
+ vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
+ return 0;
+}
+
+static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ int n;
+
+ if (*len < sizeof(*vnet_hdr))
+ return -EINVAL;
+ *len -= sizeof(*vnet_hdr);
+
+ n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
+ if (n != sizeof(*vnet_hdr))
+ return -EFAULT;
+
+ return __packet_snd_vnet_parse(vnet_hdr, *len);
+}
+
+static int packet_snd_vnet_gso(struct sk_buff *skb,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
+ u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
+
+ if (!skb_partial_csum_set(skb, s, o))
+ return -EINVAL;
+ }
+
+ skb_shinfo(skb)->gso_size =
+ __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
+ skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ return 0;
+}
+
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
- void *frame, struct net_device *dev, int size_max,
- __be16 proto, unsigned char *addr, int hlen)
+ void *frame, struct net_device *dev, void *data, int tp_len,
+ __be16 proto, unsigned char *addr, int hlen, int copylen)
{
union tpacket_uhdr ph;
- int to_write, offset, len, tp_len, nr_frags, len_max;
+ int to_write, offset, len, nr_frags, len_max;
struct socket *sock = po->sk.sk_socket;
struct page *page;
- void *data;
int err;
ph.raw = frame;
@@ -2367,51 +2511,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
skb_shinfo(skb)->destructor_arg = ph.raw;
- switch (po->tp_version) {
- case TPACKET_V2:
- tp_len = ph.h2->tp_len;
- break;
- default:
- tp_len = ph.h1->tp_len;
- break;
- }
- if (unlikely(tp_len > size_max)) {
- pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
- return -EMSGSIZE;
- }
-
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- if (unlikely(po->tp_tx_has_off)) {
- int off_min, off_max, off;
- off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
- off_max = po->tx_ring.frame_size - tp_len;
- if (sock->type == SOCK_DGRAM) {
- switch (po->tp_version) {
- case TPACKET_V2:
- off = ph.h2->tp_net;
- break;
- default:
- off = ph.h1->tp_net;
- break;
- }
- } else {
- switch (po->tp_version) {
- case TPACKET_V2:
- off = ph.h2->tp_mac;
- break;
- default:
- off = ph.h1->tp_mac;
- break;
- }
- }
- if (unlikely((off < off_min) || (off_max < off)))
- return -EINVAL;
- data = ph.raw + off;
- } else {
- data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
- }
to_write = tp_len;
if (sock->type == SOCK_DGRAM) {
@@ -2419,20 +2521,17 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
NULL, tp_len);
if (unlikely(err < 0))
return -EINVAL;
- } else if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len))
- return -EINVAL;
-
+ } else if (copylen) {
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ skb_put(skb, copylen - dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, copylen);
if (unlikely(err))
return err;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += dev->hard_header_len;
- to_write -= dev->hard_header_len;
+ data += copylen;
+ to_write -= copylen;
}
offset = offset_in_page(data);
@@ -2469,10 +2568,66 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
return tp_len;
}
+static int tpacket_parse_header(struct packet_sock *po, void *frame,
+ int size_max, void **data)
+{
+ union tpacket_uhdr ph;
+ int tp_len, off;
+
+ ph.raw = frame;
+
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ tp_len = ph.h2->tp_len;
+ break;
+ default:
+ tp_len = ph.h1->tp_len;
+ break;
+ }
+ if (unlikely(tp_len > size_max)) {
+ pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
+ return -EMSGSIZE;
+ }
+
+ if (unlikely(po->tp_tx_has_off)) {
+ int off_min, off_max;
+
+ off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ off_max = po->tx_ring.frame_size - tp_len;
+ if (po->sk.sk_type == SOCK_DGRAM) {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_net;
+ break;
+ default:
+ off = ph.h1->tp_net;
+ break;
+ }
+ } else {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_mac;
+ break;
+ default:
+ off = ph.h1->tp_mac;
+ break;
+ }
+ }
+ if (unlikely((off < off_min) || (off_max < off)))
+ return -EINVAL;
+ } else {
+ off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ }
+
+ *data = frame + off;
+ return tp_len;
+}
+
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
struct net_device *dev;
+ struct virtio_net_hdr *vnet_hdr = NULL;
__be16 proto;
int err, reserve = 0;
void *ph;
@@ -2480,9 +2635,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
int tp_len, size_max;
unsigned char *addr;
+ void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
- int hlen, tlen;
+ int hlen, tlen, copylen = 0;
mutex_lock(&po->pg_vec_lock);
@@ -2515,7 +2671,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if (size_max > dev->mtu + reserve + VLAN_HLEN)
+ if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;
do {
@@ -2527,11 +2683,36 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
continue;
}
+ skb = NULL;
+ tp_len = tpacket_parse_header(po, ph, size_max, &data);
+ if (tp_len < 0)
+ goto tpacket_error;
+
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
+ if (po->has_vnet_hdr) {
+ vnet_hdr = data;
+ data += sizeof(*vnet_hdr);
+ tp_len -= sizeof(*vnet_hdr);
+ if (tp_len < 0 ||
+ __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ copylen = __virtio16_to_cpu(vio_le(),
+ vnet_hdr->hdr_len);
+ }
+ if (dev->hard_header_len) {
+ if (ll_header_truncated(dev, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ copylen = max_t(int, copylen, dev->hard_header_len);
+ }
skb = sock_alloc_send_skb(&po->sk,
- hlen + tlen + sizeof(struct sockaddr_ll),
+ hlen + tlen + sizeof(struct sockaddr_ll) +
+ (copylen - dev->hard_header_len),
!need_wait, &err);
if (unlikely(skb == NULL)) {
@@ -2540,14 +2721,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
err = len_sum;
goto out_status;
}
- tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
- addr, hlen);
+ tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
+ addr, hlen, copylen);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
+ !po->has_vnet_hdr &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
if (unlikely(tp_len < 0)) {
+tpacket_error:
if (po->tp_loss) {
__packet_set_status(po, ph,
TP_STATUS_AVAILABLE);
@@ -2561,6 +2744,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
}
+ if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+
packet_pick_tx_queue(dev, skb);
skb->destructor = tpacket_destruct_skb;
@@ -2643,12 +2831,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
- int vnet_hdr_len;
struct packet_sock *po = pkt_sk(sk);
- unsigned short gso_type = 0;
int hlen, tlen;
int extra_len = 0;
- ssize_t n;
/*
* Get and verify the address.
@@ -2686,53 +2871,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
- vnet_hdr_len = sizeof(vnet_hdr);
-
- err = -EINVAL;
- if (len < vnet_hdr_len)
- goto out_unlock;
-
- len -= vnet_hdr_len;
-
- err = -EFAULT;
- n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
- if (n != vnet_hdr_len)
- goto out_unlock;
-
- if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
- __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
- vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
-
- err = -EINVAL;
- if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
+ err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+ if (err)
goto out_unlock;
-
- if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
- case VIRTIO_NET_HDR_GSO_TCPV4:
- gso_type = SKB_GSO_TCPV4;
- break;
- case VIRTIO_NET_HDR_GSO_TCPV6:
- gso_type = SKB_GSO_TCPV6;
- break;
- case VIRTIO_NET_HDR_GSO_UDP:
- gso_type = SKB_GSO_UDP;
- break;
- default:
- goto out_unlock;
- }
-
- if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
- gso_type |= SKB_GSO_TCP_ECN;
-
- if (vnet_hdr.gso_size == 0)
- goto out_unlock;
-
- }
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2744,7 +2885,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
}
err = -EMSGSIZE;
- if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
+ if (!vnet_hdr.gso_type &&
+ (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
goto out_unlock;
err = -ENOBUFS;
@@ -2775,7 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+ if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
err = -EMSGSIZE;
goto out_free;
@@ -2789,24 +2931,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
- if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start);
- u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset);
- if (!skb_partial_csum_set(skb, s, o)) {
- err = -EINVAL;
- goto out_free;
- }
- }
-
- skb_shinfo(skb)->gso_size =
- __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
- skb_shinfo(skb)->gso_type = gso_type;
-
- /* Header must be checked, and gso_segs computed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
-
- len += vnet_hdr_len;
+ err = packet_snd_vnet_gso(skb, &vnet_hdr);
+ if (err)
+ goto out_free;
+ len += sizeof(vnet_hdr);
}
skb_probe_transport_header(skb, reserve);
@@ -3177,51 +3305,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
packet_rcv_has_room(pkt_sk(sk), NULL);
if (pkt_sk(sk)->has_vnet_hdr) {
- struct virtio_net_hdr vnet_hdr = { 0 };
-
- err = -EINVAL;
- vnet_hdr_len = sizeof(vnet_hdr);
- if (len < vnet_hdr_len)
- goto out_free;
-
- len -= vnet_hdr_len;
-
- if (skb_is_gso(skb)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
-
- /* This is a hint as to how much should be linear. */
- vnet_hdr.hdr_len =
- __cpu_to_virtio16(vio_le(), skb_headlen(skb));
- vnet_hdr.gso_size =
- __cpu_to_virtio16(vio_le(), sinfo->gso_size);
- if (sinfo->gso_type & SKB_GSO_TCPV4)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- else if (sinfo->gso_type & SKB_GSO_TCPV6)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- else if (sinfo->gso_type & SKB_GSO_UDP)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
- else if (sinfo->gso_type & SKB_GSO_FCOE)
- goto out_free;
- else
- BUG();
- if (sinfo->gso_type & SKB_GSO_TCP_ECN)
- vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
- } else
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
- skb_checksum_start_offset(skb));
- vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
- skb->csum_offset);
- } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
- } /* else everything is zero */
-
- err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
- if (err < 0)
+ err = packet_rcv_vnet(msg, skb, &len);
+ if (err)
goto out_free;
+ vnet_hdr_len = sizeof(struct virtio_net_hdr);
}
/* You lose any data beyond the buffer you gave. If it worries
@@ -3552,8 +3639,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
}
if (optlen < len)
return -EINVAL;
- if (pkt_sk(sk)->has_vnet_hdr)
- return -EINVAL;
if (copy_from_user(&req_u.req, optval, len))
return -EFAULT;
return packet_set_ring(sk, &req_u, 0,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
rcu_read_unlock();
}
-void pn_sock_hash(struct sock *sk)
+int pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
mutex_lock(&pnsocks.lock);
sk_add_node_rcu(sk, hlist);
mutex_unlock(&pnsocks.lock);
+
+ return 0;
}
EXPORT_SYMBOL(pn_sock_hash);
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
diff --git a/net/rds/ib.c b/net/rds/ib.c
index f222885ac0c7..9481d55ff6cb 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
- struct ib_device_attr *dev_attr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_ibdev->spinlock);
atomic_set(&rds_ibdev->refcount, 1);
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
- rds_ibdev->max_wrs = dev_attr->max_qp_wr;
- rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+ rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+ rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
- rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, (dev_attr->max_mr / 2),
+ rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+ rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, (device->attrs.max_mr / 2),
rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
- rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+ rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
- rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
- rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+ rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+ rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
}
rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
- dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
rds_ibdev->max_8k_fmrs);
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
put_dev:
rds_ib_dev_put(rds_ibdev);
-free_attr:
- kfree(dev_attr);
}
/*
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 576f1825fc55..f4a9fff829e0 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
static void rds_iw_add_one(struct ib_device *device)
{
struct rds_iw_device *rds_iwdev;
- struct ib_device_attr *dev_attr;
/* Only handle iwarp devices */
if (device->node_type != RDMA_NODE_RNIC)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
if (!rds_iwdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_iwdev->spinlock);
- rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
- rds_iwdev->max_wrs = dev_attr->max_qp_wr;
- rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+ rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+ rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+ rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
rds_iwdev->dev = device;
rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
list_add_tail(&rds_iwdev->list, &rds_iw_devices);
ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
- goto free_attr;
+ return;
err_mr:
if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
ib_dealloc_pd(rds_iwdev->pd);
free_dev:
kfree(rds_iwdev);
-free_attr:
- kfree(dev_attr);
}
static void rds_iw_remove_one(struct ib_device *device, void *client_data)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 9d6ddbacd875..ad60299b088b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -37,7 +37,6 @@
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#include <net/tcp.h>
#include "rds.h"
#include "tcp.h"
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a8c05e18da58..03f26e3a6f48 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1094,17 +1094,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
return res;
}
-static bool rfkill_readable(struct rfkill_data *data)
-{
- bool r;
-
- mutex_lock(&data->mtx);
- r = !list_empty(&data->events);
- mutex_unlock(&data->mtx);
-
- return r;
-}
-
static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1121,8 +1110,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
goto out;
}
mutex_unlock(&data->mtx);
+ /* since we re-check and it just compares pointers,
+ * using !list_empty() without locking isn't a problem
+ */
ret = wait_event_interruptible(data->read_wait,
- rfkill_readable(data));
+ !list_empty(&data->events));
mutex_lock(&data->mtx);
if (ret)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 06e7c4a37245..96066665e376 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,10 +36,9 @@ static void free_tcf(struct rcu_head *head)
kfree(p);
}
-static void tcf_hash_destroy(struct tc_action *a)
+static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *a)
{
struct tcf_common *p = a->priv;
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfc_head);
@@ -68,8 +67,8 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
if (a->ops->cleanup)
a->ops->cleanup(a, bind);
- tcf_hash_destroy(a);
- ret = 1;
+ tcf_hash_destroy(a->hinfo, a);
+ ret = ACT_P_DELETED;
}
}
@@ -77,10 +76,9 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
}
EXPORT_SYMBOL(__tcf_hash_release);
-static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
- struct tc_action *a)
+static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+ struct netlink_callback *cb, struct tc_action *a)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -126,9 +124,9 @@ nla_put_failure:
goto done;
}
-static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
+static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+ struct tc_action *a)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct hlist_head *head;
struct hlist_node *n;
struct tcf_common *p;
@@ -163,18 +161,24 @@ nla_put_failure:
return ret;
}
-static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
- int type, struct tc_action *a)
+int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
{
+ struct tcf_hashinfo *hinfo = tn->hinfo;
+
+ a->hinfo = hinfo;
+
if (type == RTM_DELACTION) {
- return tcf_del_walker(skb, a);
+ return tcf_del_walker(hinfo, skb, a);
} else if (type == RTM_GETACTION) {
- return tcf_dump_walker(skb, cb, a);
+ return tcf_dump_walker(hinfo, skb, cb, a);
} else {
WARN(1, "tcf_generic_walker: unknown action %d\n", type);
return -EINVAL;
}
}
+EXPORT_SYMBOL(tcf_generic_walker);
static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
@@ -191,8 +195,9 @@ static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
return p;
}
-u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo)
+u32 tcf_hash_new_index(struct tc_action_net *tn)
{
+ struct tcf_hashinfo *hinfo = tn->hinfo;
u32 val = hinfo->index;
do {
@@ -205,28 +210,31 @@ u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo)
}
EXPORT_SYMBOL(tcf_hash_new_index);
-int tcf_hash_search(struct tc_action *a, u32 index)
+int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = tcf_hash_lookup(index, hinfo);
if (p) {
a->priv = p;
+ a->hinfo = hinfo;
return 1;
}
return 0;
}
EXPORT_SYMBOL(tcf_hash_search);
-int tcf_hash_check(u32 index, struct tc_action *a, int bind)
+int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = NULL;
if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
if (bind)
p->tcfc_bindcnt++;
p->tcfc_refcnt++;
a->priv = p;
+ a->hinfo = hinfo;
return 1;
}
return 0;
@@ -243,11 +251,11 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
}
EXPORT_SYMBOL(tcf_hash_cleanup);
-int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind, bool cpustats)
+int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+ struct tc_action *a, int size, int bind, bool cpustats)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+ struct tcf_hashinfo *hinfo = tn->hinfo;
int err = -ENOMEM;
if (unlikely(!p))
@@ -272,7 +280,7 @@ err2:
}
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
- p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
+ p->tcfc_index = index ? index : tcf_hash_new_index(tn);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
@@ -286,14 +294,15 @@ err2:
}
a->priv = (void *) p;
+ a->hinfo = hinfo;
return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
-void tcf_hash_insert(struct tc_action *a)
+void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_common *p = a->priv;
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tcf_hashinfo *hinfo = tn->hinfo;
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
spin_lock_bh(&hinfo->lock);
@@ -302,59 +311,78 @@ void tcf_hash_insert(struct tc_action *a)
}
EXPORT_SYMBOL(tcf_hash_insert);
+void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
+ struct tcf_hashinfo *hinfo)
+{
+ struct tc_action a = {
+ .ops = ops,
+ .hinfo = hinfo,
+ };
+ int i;
+
+ for (i = 0; i < hinfo->hmask + 1; i++) {
+ struct tcf_common *p;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfc_head) {
+ int ret;
+
+ a.priv = p;
+ ret = __tcf_hash_release(&a, false, true);
+ if (ret == ACT_P_DELETED)
+ module_put(ops->owner);
+ else if (ret < 0)
+ return;
+ }
+ }
+ kfree(hinfo->htab);
+}
+EXPORT_SYMBOL(tcf_hashinfo_destroy);
+
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
-int tcf_register_action(struct tc_action_ops *act, unsigned int mask)
+int tcf_register_action(struct tc_action_ops *act,
+ struct pernet_operations *ops)
{
struct tc_action_ops *a;
- int err;
+ int ret;
- /* Must supply act, dump and init */
- if (!act->act || !act->dump || !act->init)
+ if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
return -EINVAL;
- /* Supply defaults */
- if (!act->lookup)
- act->lookup = tcf_hash_search;
- if (!act->walk)
- act->walk = tcf_generic_walker;
-
- act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
- if (!act->hinfo)
- return -ENOMEM;
- err = tcf_hashinfo_init(act->hinfo, mask);
- if (err) {
- kfree(act->hinfo);
- return err;
- }
-
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
- tcf_hashinfo_destroy(act->hinfo);
- kfree(act->hinfo);
return -EEXIST;
}
}
list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
+
+ ret = register_pernet_subsys(ops);
+ if (ret) {
+ tcf_unregister_action(act, ops);
+ return ret;
+ }
+
return 0;
}
EXPORT_SYMBOL(tcf_register_action);
-int tcf_unregister_action(struct tc_action_ops *act)
+int tcf_unregister_action(struct tc_action_ops *act,
+ struct pernet_operations *ops)
{
struct tc_action_ops *a;
int err = -ENOENT;
+ unregister_pernet_subsys(ops);
+
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (a == act) {
list_del(&act->head);
- tcf_hashinfo_destroy(act->hinfo);
- kfree(act->hinfo);
err = 0;
break;
}
@@ -721,8 +749,8 @@ static struct tc_action *create_a(int i)
return act;
}
-static struct tc_action *
-tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
+static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
+ struct nlmsghdr *n, u32 portid)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action *a;
@@ -749,7 +777,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
if (a->ops == NULL) /* could happen in batch of actions */
goto err_free;
err = -ENOENT;
- if (a->ops->lookup(a, index) == 0)
+ if (a->ops->lookup(net, a, index) == 0)
goto err_mod;
module_put(a->ops->owner);
@@ -819,7 +847,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (nest == NULL)
goto out_module_put;
- err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a);
+ err = a.ops->walk(net, skb, &dcb, RTM_DELACTION, &a);
if (err < 0)
goto out_module_put;
if (err == 0)
@@ -897,7 +925,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_get_1(tb[i], n, portid);
+ act = tcf_action_get_1(net, tb[i], n, portid);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
@@ -1044,6 +1072,7 @@ find_dump_kind(const struct nlmsghdr *n)
static int
tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -1078,7 +1107,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto out_module_put;
- ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
+ ret = a_o->walk(net, skb, cb, RTM_GETACTION, &a);
if (ret < 0)
goto out_module_put;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0bc6f912f870..8c9f1f0459ab 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -33,6 +33,8 @@ struct tcf_bpf_cfg {
bool is_ebpf;
};
+static int bpf_net_id;
+
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
@@ -275,6 +277,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *act,
int replace, int bind)
{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
@@ -294,8 +297,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
- if (!tcf_hash_check(parm->index, act, bind)) {
- ret = tcf_hash_create(parm->index, est, act,
+ if (!tcf_hash_check(tn, parm->index, act, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, act,
sizeof(*prog), bind, true);
if (ret < 0)
return ret;
@@ -344,7 +347,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
rcu_assign_pointer(prog->filter, cfg.filter);
if (res == ACT_P_CREATED) {
- tcf_hash_insert(act);
+ tcf_hash_insert(tn, act);
} else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
@@ -367,6 +370,22 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
tcf_bpf_cfg_cleanup(&tmp);
}
+static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_bpf_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
.type = TCA_ACT_BPF,
@@ -375,16 +394,39 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.dump = tcf_bpf_dump,
.cleanup = tcf_bpf_cleanup,
.init = tcf_bpf_init,
+ .walk = tcf_bpf_walker,
+ .lookup = tcf_bpf_search,
+};
+
+static __net_init int bpf_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __net_exit bpf_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations bpf_net_ops = {
+ .init = bpf_init_net,
+ .exit = bpf_exit_net,
+ .id = &bpf_net_id,
+ .size = sizeof(struct tc_action_net),
};
static int __init bpf_init_module(void)
{
- return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+ return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
}
static void __exit bpf_cleanup_module(void)
{
- tcf_unregister_action(&act_bpf_ops);
+ tcf_unregister_action(&act_bpf_ops, &bpf_net_ops);
}
module_init(bpf_init_module);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index bb41699c6c49..c0ed93ce2391 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,6 +30,8 @@
#define CONNMARK_TAB_MASK 3
+static int connmark_net_id;
+
static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -97,6 +99,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
@@ -111,9 +114,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*ci), bind, false);
if (ret)
return ret;
@@ -122,7 +125,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci->net = net;
ci->zone = parm->zone;
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
ret = ACT_P_CREATED;
} else {
ci = to_connmark(a);
@@ -169,6 +172,22 @@ nla_put_failure:
return -1;
}
+static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_connmark_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.type = TCA_ACT_CONNMARK,
@@ -176,16 +195,39 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
+ .walk = tcf_connmark_walker,
+ .lookup = tcf_connmark_search,
+};
+
+static __net_init int connmark_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
+}
+
+static void __net_exit connmark_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations connmark_net_ops = {
+ .init = connmark_init_net,
+ .exit = connmark_exit_net,
+ .id = &connmark_net_id,
+ .size = sizeof(struct tc_action_net),
};
static int __init connmark_init_module(void)
{
- return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
+ return tcf_register_action(&act_connmark_ops, &connmark_net_ops);
}
static void __exit connmark_cleanup_module(void)
{
- tcf_unregister_action(&act_connmark_ops);
+ tcf_unregister_action(&act_connmark_ops, &connmark_net_ops);
}
module_init(connmark_init_module);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b07c535ba8e7..d22426cdebc0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -42,9 +42,13 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};
-static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
- struct tc_action *a, int ovr, int bind)
+static int csum_net_id;
+
+static int tcf_csum_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a, int ovr,
+ int bind)
{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
@@ -61,9 +65,9 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*p), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -82,7 +86,7 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -105,9 +109,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
int hl = ihl + jhl;
if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, hl + ntkoff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, hl + ntkoff))
return NULL;
else
return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +367,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
}
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
goto fail;
ip_send_check(ip_hdr(skb));
@@ -559,6 +559,22 @@ nla_put_failure:
return -1;
}
+static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_csum_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
.type = TCA_ACT_CSUM,
@@ -566,6 +582,29 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.init = tcf_csum_init,
+ .walk = tcf_csum_walker,
+ .lookup = tcf_csum_search,
+};
+
+static __net_init int csum_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
+}
+
+static void __net_exit csum_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations csum_net_ops = {
+ .init = csum_init_net,
+ .exit = csum_exit_net,
+ .id = &csum_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_DESCRIPTION("Checksum updating actions");
@@ -573,12 +612,12 @@ MODULE_LICENSE("GPL");
static int __init csum_init_module(void)
{
- return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK);
+ return tcf_register_action(&act_csum_ops, &csum_net_ops);
}
static void __exit csum_cleanup_module(void)
{
- tcf_unregister_action(&act_csum_ops);
+ tcf_unregister_action(&act_csum_ops, &csum_net_ops);
}
module_init(csum_init_module);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5c1b05170736..887fc1f209ff 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,6 +25,8 @@
#define GACT_TAB_MASK 15
+static int gact_net_id;
+
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
@@ -57,6 +59,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tc_gact *parm;
struct tcf_gact *gact;
@@ -88,9 +91,9 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
- bind, true);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*gact), bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -118,7 +121,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -183,6 +186,22 @@ nla_put_failure:
return -1;
}
+static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_gact_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -190,6 +209,29 @@ static struct tc_action_ops act_gact_ops = {
.act = tcf_gact,
.dump = tcf_gact_dump,
.init = tcf_gact_init,
+ .walk = tcf_gact_walker,
+ .lookup = tcf_gact_search,
+};
+
+static __net_init int gact_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
+}
+
+static void __net_exit gact_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations gact_net_ops = {
+ .init = gact_init_net,
+ .exit = gact_exit_net,
+ .id = &gact_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -203,12 +245,13 @@ static int __init gact_init_module(void)
#else
pr_info("GACT probability NOT on\n");
#endif
- return tcf_register_action(&act_gact_ops, GACT_TAB_MASK);
+
+ return tcf_register_action(&act_gact_ops, &gact_net_ops);
}
static void __exit gact_cleanup_module(void)
{
- tcf_unregister_action(&act_gact_ops);
+ tcf_unregister_action(&act_gact_ops, &gact_net_ops);
}
module_init(gact_init_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d05869646515..89c41a1f3589 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,10 @@
#define IPT_TAB_MASK 15
+static int ipt_net_id;
+
+static int xt_net_id;
+
static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
{
struct xt_tgchk_param par;
@@ -83,8 +87,9 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
[TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
};
-static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action *a, int ovr, int bind)
+static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a, int ovr,
+ int bind)
{
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
@@ -113,8 +118,9 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
- if (!tcf_hash_check(index, a, bind) ) {
- ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
+ if (!tcf_hash_check(tn, index, a, bind)) {
+ ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
+ false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -157,7 +163,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
err3:
@@ -170,6 +176,24 @@ err1:
return err;
}
+static int tcf_ipt_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a, int ovr,
+ int bind)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
+}
+
+static int tcf_xt_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a, int ovr,
+ int bind)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
+}
+
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -260,6 +284,22 @@ nla_put_failure:
return -1;
}
+static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_ipt_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.type = TCA_ACT_IPT,
@@ -268,8 +308,47 @@ static struct tc_action_ops act_ipt_ops = {
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
+ .walk = tcf_ipt_walker,
+ .lookup = tcf_ipt_search,
+};
+
+static __net_init int ipt_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
+}
+
+static void __net_exit ipt_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations ipt_net_ops = {
+ .init = ipt_init_net,
+ .exit = ipt_exit_net,
+ .id = &ipt_net_id,
+ .size = sizeof(struct tc_action_net),
};
+static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_xt_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.type = TCA_ACT_XT,
@@ -277,7 +356,30 @@ static struct tc_action_ops act_xt_ops = {
.act = tcf_ipt,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
- .init = tcf_ipt_init,
+ .init = tcf_xt_init,
+ .walk = tcf_xt_walker,
+ .lookup = tcf_xt_search,
+};
+
+static __net_init int xt_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
+}
+
+static void __net_exit xt_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations xt_net_ops = {
+ .init = xt_init_net,
+ .exit = xt_exit_net,
+ .id = &xt_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
@@ -289,12 +391,13 @@ static int __init ipt_init_module(void)
{
int ret1, ret2;
- ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK);
+ ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
if (ret1 < 0)
- printk("Failed to load xt action\n");
- ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK);
+ pr_err("Failed to load xt action\n");
+
+ ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
if (ret2 < 0)
- printk("Failed to load ipt action\n");
+ pr_err("Failed to load ipt action\n");
if (ret1 < 0 && ret2 < 0) {
return ret1;
@@ -304,8 +407,8 @@ static int __init ipt_init_module(void)
static void __exit ipt_cleanup_module(void)
{
- tcf_unregister_action(&act_xt_ops);
- tcf_unregister_action(&act_ipt_ops);
+ tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
+ tcf_unregister_action(&act_xt_ops, &xt_net_ops);
}
module_init(ipt_init_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 32fcdecdb9e2..6b284d991e0b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -50,10 +50,13 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
};
+static int mirred_net_id;
+
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a, int ovr,
int bind)
{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tc_mirred *parm;
struct tcf_mirred *m;
@@ -96,11 +99,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev = NULL;
}
- if (!tcf_hash_check(parm->index, a, bind)) {
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
- bind, true);
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*m), bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -130,7 +133,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
spin_unlock_bh(&mirred_list_lock);
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
}
return ret;
@@ -221,6 +224,22 @@ nla_put_failure:
return -1;
}
+static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_mirred_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static int mirred_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -257,6 +276,29 @@ static struct tc_action_ops act_mirred_ops = {
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
+ .walk = tcf_mirred_walker,
+ .lookup = tcf_mirred_search,
+};
+
+static __net_init int mirred_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
+}
+
+static void __net_exit mirred_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations mirred_net_ops = {
+ .init = mirred_init_net,
+ .exit = mirred_exit_net,
+ .id = &mirred_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
@@ -270,12 +312,12 @@ static int __init mirred_init_module(void)
return err;
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK);
+ return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
}
static void __exit mirred_cleanup_module(void)
{
- tcf_unregister_action(&act_mirred_ops);
+ tcf_unregister_action(&act_mirred_ops, &mirred_net_ops);
unregister_netdevice_notifier(&mirred_device_notifier);
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b7c4ead8b5a8..0f65cdfbfb1d 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,6 +31,8 @@
#define NAT_TAB_MASK 15
+static int nat_net_id;
+
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
[TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
};
@@ -38,6 +40,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action *a, int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tc_nat *parm;
int ret = 0, err;
@@ -54,9 +57,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*p), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -79,7 +82,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -126,9 +129,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
addr = iph->daddr;
if (!((old_addr ^ addr) & mask)) {
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, sizeof(*iph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, sizeof(*iph) + noff))
goto drop;
new_addr &= mask;
@@ -156,9 +157,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct tcphdr *tcph;
if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
@@ -171,9 +170,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct udphdr *udph;
if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
- (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
goto drop;
udph = (void *)(skb_network_header(skb) + ihl);
@@ -213,10 +210,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
if ((old_addr ^ addr) & mask)
break;
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, ihl + sizeof(*icmph) +
- sizeof(*iph) + noff) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
+ sizeof(*iph) + noff))
goto drop;
icmph = (void *)(skb_network_header(skb) + ihl);
@@ -282,6 +277,22 @@ nla_put_failure:
return -1;
}
+static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_nat_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.type = TCA_ACT_NAT,
@@ -289,6 +300,29 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
+ .walk = tcf_nat_walker,
+ .lookup = tcf_nat_search,
+};
+
+static __net_init int nat_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
+}
+
+static void __net_exit nat_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations nat_net_ops = {
+ .init = nat_init_net,
+ .exit = nat_exit_net,
+ .id = &nat_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_DESCRIPTION("Stateless NAT actions");
@@ -296,12 +330,12 @@ MODULE_LICENSE("GPL");
static int __init nat_init_module(void)
{
- return tcf_register_action(&act_nat_ops, NAT_TAB_MASK);
+ return tcf_register_action(&act_nat_ops, &nat_net_ops);
}
static void __exit nat_cleanup_module(void)
{
- tcf_unregister_action(&act_nat_ops);
+ tcf_unregister_action(&act_nat_ops, &nat_net_ops);
}
module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e38a7701f154..429c3ab65142 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,6 +25,8 @@
#define PEDIT_TAB_MASK 15
+static int pedit_net_id;
+
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
};
@@ -33,6 +35,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tc_pedit *parm;
int ret = 0, err;
@@ -54,11 +57,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
return -EINVAL;
- if (!tcf_hash_check(parm->index, a, bind)) {
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
- bind, false);
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*p), bind, false);
if (ret)
return ret;
p = to_pedit(a);
@@ -93,7 +96,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -211,6 +214,22 @@ nla_put_failure:
return -1;
}
+static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_pedit_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
.type = TCA_ACT_PEDIT,
@@ -219,6 +238,29 @@ static struct tc_action_ops act_pedit_ops = {
.dump = tcf_pedit_dump,
.cleanup = tcf_pedit_cleanup,
.init = tcf_pedit_init,
+ .walk = tcf_pedit_walker,
+ .lookup = tcf_pedit_search,
+};
+
+static __net_init int pedit_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
+}
+
+static void __net_exit pedit_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations pedit_net_ops = {
+ .init = pedit_init_net,
+ .exit = pedit_exit_net,
+ .id = &pedit_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -227,12 +269,12 @@ MODULE_LICENSE("GPL");
static int __init pedit_init_module(void)
{
- return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK);
+ return tcf_register_action(&act_pedit_ops, &pedit_net_ops);
}
static void __exit pedit_cleanup_module(void)
{
- tcf_unregister_action(&act_pedit_ops);
+ tcf_unregister_action(&act_pedit_ops, &pedit_net_ops);
}
module_init(pedit_init_module);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9a1c42a43f92..330f14e302e8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,10 +55,14 @@ struct tc_police_compat {
/* Each policer is serialized by its individual spinlock */
-static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
- int type, struct tc_action *a)
+static int police_net_id;
+
+static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+ struct tcf_hashinfo *hinfo = tn->hinfo;
struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -121,7 +125,8 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+ struct tcf_hashinfo *hinfo = tn->hinfo;
int size;
if (nla == NULL)
@@ -139,7 +144,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_POLICE_TBF]);
if (parm->index) {
- if (tcf_hash_search(a, parm->index)) {
+ if (tcf_hash_search(tn, a, parm->index)) {
police = to_police(a->priv);
if (bind) {
police->tcf_bindcnt += 1;
@@ -233,7 +238,7 @@ override:
police->tcfp_t_c = ktime_get_ns();
police->tcf_index = parm->index ? parm->index :
- tcf_hash_new_index(hinfo);
+ tcf_hash_new_index(tn);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
spin_lock_bh(&hinfo->lock);
hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
@@ -342,6 +347,13 @@ nla_put_failure:
return -1;
}
+static int tcf_police_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
@@ -353,19 +365,41 @@ static struct tc_action_ops act_police_ops = {
.act = tcf_act_police,
.dump = tcf_act_police_dump,
.init = tcf_act_police_locate,
- .walk = tcf_act_police_walker
+ .walk = tcf_act_police_walker,
+ .lookup = tcf_police_search,
+};
+
+static __net_init int police_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+
+ return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
+}
+
+static void __net_exit police_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations police_net_ops = {
+ .init = police_init_net,
+ .exit = police_exit_net,
+ .id = &police_net_id,
+ .size = sizeof(struct tc_action_net),
};
static int __init
police_init_module(void)
{
- return tcf_register_action(&act_police_ops, POL_TAB_MASK);
+ return tcf_register_action(&act_police_ops, &police_net_ops);
}
static void __exit
police_cleanup_module(void)
{
- tcf_unregister_action(&act_police_ops);
+ tcf_unregister_action(&act_police_ops, &police_net_ops);
}
module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d6b708d6afdf..75b2be13fbcc 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,6 +26,8 @@
#define SIMP_TAB_MASK 7
+static int simp_net_id;
+
#define SIMP_MAX_DATA 32
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
@@ -80,6 +82,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
@@ -102,9 +105,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_DEF_PARMS]);
defdata = nla_data(tb[TCA_DEF_DATA]);
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*d), bind, false);
if (ret)
return ret;
@@ -129,7 +132,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -161,6 +164,22 @@ nla_put_failure:
return -1;
}
+static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_simp_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.type = TCA_ACT_SIMP,
@@ -169,6 +188,29 @@ static struct tc_action_ops act_simp_ops = {
.dump = tcf_simp_dump,
.cleanup = tcf_simp_release,
.init = tcf_simp_init,
+ .walk = tcf_simp_walker,
+ .lookup = tcf_simp_search,
+};
+
+static __net_init int simp_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
+}
+
+static void __net_exit simp_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations simp_net_ops = {
+ .init = simp_init_net,
+ .exit = simp_exit_net,
+ .id = &simp_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
@@ -177,8 +219,7 @@ MODULE_LICENSE("GPL");
static int __init simp_init_module(void)
{
- int ret;
- ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
+ int ret = tcf_register_action(&act_simp_ops, &simp_net_ops);
if (!ret)
pr_info("Simple TC action Loaded\n");
return ret;
@@ -186,7 +227,7 @@ static int __init simp_init_module(void)
static void __exit simp_cleanup_module(void)
{
- tcf_unregister_action(&act_simp_ops);
+ tcf_unregister_action(&act_simp_ops, &simp_net_ops);
}
module_init(simp_init_module);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6751b5f8c046..cfcdbdc00c9b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,6 +29,8 @@
#define SKBEDIT_TAB_MASK 15
+static int skbedit_net_id;
+
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -61,6 +63,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
@@ -98,9 +101,9 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*d), bind, false);
if (ret)
return ret;
@@ -130,7 +133,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -173,6 +176,22 @@ nla_put_failure:
return -1;
}
+static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_skbedit_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
.type = TCA_ACT_SKBEDIT,
@@ -180,6 +199,29 @@ static struct tc_action_ops act_skbedit_ops = {
.act = tcf_skbedit,
.dump = tcf_skbedit_dump,
.init = tcf_skbedit_init,
+ .walk = tcf_skbedit_walker,
+ .lookup = tcf_skbedit_search,
+};
+
+static __net_init int skbedit_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
+}
+
+static void __net_exit skbedit_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbedit_net_ops = {
+ .init = skbedit_init_net,
+ .exit = skbedit_exit_net,
+ .id = &skbedit_net_id,
+ .size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
@@ -188,12 +230,12 @@ MODULE_LICENSE("GPL");
static int __init skbedit_init_module(void)
{
- return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK);
+ return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops);
}
static void __exit skbedit_cleanup_module(void)
{
- tcf_unregister_action(&act_skbedit_ops);
+ tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops);
}
module_init(skbedit_init_module);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 796785e0bf96..bab8ae0cefc0 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,6 +21,8 @@
#define VLAN_TAB_MASK 15
+static int vlan_net_id;
+
static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -68,6 +70,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *a,
int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -115,9 +118,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
}
action = parm->v_action;
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
- bind, false);
+ if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ sizeof(*v), bind, false);
if (ret)
return ret;
@@ -143,7 +146,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&v->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(tn, a);
return ret;
}
@@ -181,6 +184,22 @@ nla_put_failure:
return -1;
}
+static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ struct tc_action *a)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, a);
+}
+
+static int tcf_vlan_search(struct net *net, struct tc_action *a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
.type = TCA_ACT_VLAN,
@@ -188,16 +207,39 @@ static struct tc_action_ops act_vlan_ops = {
.act = tcf_vlan,
.dump = tcf_vlan_dump,
.init = tcf_vlan_init,
+ .walk = tcf_vlan_walker,
+ .lookup = tcf_vlan_search,
+};
+
+static __net_init int vlan_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
+}
+
+static void __net_exit vlan_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations vlan_net_ops = {
+ .init = vlan_init_net,
+ .exit = vlan_exit_net,
+ .id = &vlan_net_id,
+ .size = sizeof(struct tc_action_net),
};
static int __init vlan_init_module(void)
{
- return tcf_register_action(&act_vlan_ops, VLAN_TAB_MASK);
+ return tcf_register_action(&act_vlan_ops, &vlan_net_ops);
}
static void __exit vlan_cleanup_module(void)
{
- tcf_unregister_action(&act_vlan_ops);
+ tcf_unregister_action(&act_vlan_ops, &vlan_net_ops);
}
module_init(vlan_init_module);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4fbb67430ce4..563cdad76448 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -43,6 +43,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <linux/netdevice.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -58,6 +59,7 @@ struct tc_u_knode {
#ifdef CONFIG_CLS_U32_PERF
struct tc_u32_pcnt __percpu *pf;
#endif
+ u32 flags;
#ifdef CONFIG_CLS_U32_MARK
u32 val;
u32 mask;
@@ -424,6 +426,97 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_u32_offload u32_offload = {0};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSU32;
+ offload.cls_u32 = &u32_offload;
+
+ if (tc_should_offload(dev, 0)) {
+ offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
+ offload.cls_u32->knode.handle = handle;
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+ }
+}
+
+static void u32_replace_hw_hnode(struct tcf_proto *tp,
+ struct tc_u_hnode *h,
+ u32 flags)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_u32_offload u32_offload = {0};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSU32;
+ offload.cls_u32 = &u32_offload;
+
+ if (tc_should_offload(dev, flags)) {
+ offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
+ offload.cls_u32->hnode.divisor = h->divisor;
+ offload.cls_u32->hnode.handle = h->handle;
+ offload.cls_u32->hnode.prio = h->prio;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+ }
+}
+
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_u32_offload u32_offload = {0};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSU32;
+ offload.cls_u32 = &u32_offload;
+
+ if (tc_should_offload(dev, 0)) {
+ offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
+ offload.cls_u32->hnode.divisor = h->divisor;
+ offload.cls_u32->hnode.handle = h->handle;
+ offload.cls_u32->hnode.prio = h->prio;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+ }
+}
+
+static void u32_replace_hw_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n,
+ u32 flags)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_u32_offload u32_offload = {0};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSU32;
+ offload.cls_u32 = &u32_offload;
+
+ if (tc_should_offload(dev, flags)) {
+ offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
+ offload.cls_u32->knode.handle = n->handle;
+ offload.cls_u32->knode.fshift = n->fshift;
+#ifdef CONFIG_CLS_U32_MARK
+ offload.cls_u32->knode.val = n->val;
+ offload.cls_u32->knode.mask = n->mask;
+#else
+ offload.cls_u32->knode.val = 0;
+ offload.cls_u32->knode.mask = 0;
+#endif
+ offload.cls_u32->knode.sel = &n->sel;
+ offload.cls_u32->knode.exts = &n->exts;
+ if (n->ht_down)
+ offload.cls_u32->knode.link_handle = n->ht_down->handle;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+ }
+}
+
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_knode *n;
@@ -434,6 +527,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
+ u32_remove_hw_knode(tp, n->handle);
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
@@ -454,6 +548,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
phn;
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
+ u32_clear_hw_hnode(tp, ht);
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -540,8 +635,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
if (ht == NULL)
return 0;
- if (TC_U32_KEY(ht->handle))
+ if (TC_U32_KEY(ht->handle)) {
+ u32_remove_hw_knode(tp, ht->handle);
return u32_delete_key(tp, (struct tc_u_knode *)ht);
+ }
if (root_ht == ht)
return -EINVAL;
@@ -587,6 +684,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
[TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
[TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
[TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
+ [TCA_U32_FLAGS] = { .type = NLA_U32 },
};
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
@@ -694,6 +792,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
#endif
new->fshift = n->fshift;
new->res = n->res;
+ new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, n->ht_down);
/* bump reference count as long as we hold pointer to structure */
@@ -733,7 +832,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tc_u32_sel *s;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_U32_MAX + 1];
- u32 htid;
+ u32 htid, flags = 0;
int err;
#ifdef CONFIG_CLS_U32_PERF
size_t size;
@@ -746,6 +845,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
+ if (tb[TCA_U32_FLAGS])
+ flags = nla_get_u32(tb[TCA_U32_FLAGS]);
+
n = (struct tc_u_knode *)*arg;
if (n) {
struct tc_u_knode *new;
@@ -753,6 +855,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
+ if (n->flags != flags)
+ return -EINVAL;
+
new = u32_init_knode(tp, n);
if (!new)
return -ENOMEM;
@@ -769,6 +874,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
call_rcu(&n->rcu, u32_delete_key_rcu);
+ u32_replace_hw_knode(tp, new, flags);
return 0;
}
@@ -795,6 +901,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, ht);
*arg = (unsigned long)ht;
+
+ u32_replace_hw_hnode(tp, ht, flags);
return 0;
}
@@ -845,6 +953,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
+ n->flags = flags;
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
n->tp = tp;
@@ -877,7 +986,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->next, pins);
rcu_assign_pointer(*ins, n);
-
+ u32_replace_hw_knode(tp, n, flags);
*arg = (unsigned long)n;
return 0;
}
@@ -982,6 +1091,9 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
goto nla_put_failure;
+ if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
+ goto nla_put_failure;
+
#ifdef CONFIG_CLS_U32_MARK
if ((n->val || n->mask)) {
struct tc_u32_mark mark = {.val = n->val,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b5c2cf2aa6d4..3b180ff72f79 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
+ unsigned int len)
{
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
int drops;
- if (n == 0)
+ if (n == 0 && len == 0)
return;
drops = max_t(int, n, 0);
rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.backlog -= len;
__qdisc_qstats_drop(sch, drops);
}
rcu_read_unlock();
}
-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
@@ -1841,7 +1843,7 @@ reclassify:
return err;
}
- return -1;
+ return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
if (unlikely(limit++ >= MAX_REC_LOOP)) {
@@ -1852,6 +1854,7 @@ reset:
}
tp = old_tp;
+ protocol = tc_skb_protocol(skb);
goto reclassify;
#endif
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c538d9e4a8f6..baafddf229ce 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new->reshape_fail = cbq_reshape_fail;
#endif
}
- sch_tree_lock(sch);
- *old = cl->q;
- cl->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->q);
return 0;
}
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen;
+ unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
qlen = cl->q->q.qlen;
+ backlog = cl->q->qstats.backlog;
qdisc_reset(cl->q);
- qdisc_tree_decrease_qlen(cl->q, qlen);
+ qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 5ffb8b8337c7..0a08c860eee4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
choke_zap_tail_holes(q);
qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
- qdisc_tree_decrease_qlen(sch, 1);
--sch->q.qlen;
}
@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
old = q->tab;
if (old) {
unsigned int oqlen = sch->q.qlen, tail = 0;
+ unsigned dropped = 0;
while (q->head != q->tail) {
struct sk_buff *skb = q->tab[q->head];
@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ntab[tail++] = skb;
continue;
}
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
q->tail = tail;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 535007d5f0b5..9b7e2980ee5c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->stats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
+ q->stats.drop_len = 0;
}
if (skb)
qdisc_bstats_update(sch, skb);
@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea875c1..a63e879e8975 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
static void drr_purge_queue(struct drr_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- drr_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
@@ -403,6 +400,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
if (len <= cl->deficit) {
cl->deficit -= len;
skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (unlikely(skb == NULL))
+ goto out;
if (cl->qdisc->q.qlen == 0)
list_del(&cl->alist);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f357f34d02d2..d0dff0cd8186 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- *old = p->q;
- p->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &p->q);
return 0;
}
@@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -286,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
return NULL;
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
index = skb->tc_index & (p->indices - 1);
@@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 109b2322778f..3c6a47d66a04 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
int err, drop_count = 0;
+ unsigned drop_len = 0;
u32 fq_log;
if (!opt)
@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
drop_count++;
}
- qdisc_tree_decrease_qlen(sch, drop_count);
+ qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
sch_tree_unlock(sch);
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4c834e93dafb..d3fc8f9dd3d4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx;
+ unsigned int idx, prev_backlog;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet
* from this flow.
@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
+ unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -259,6 +261,7 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
+ prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
dequeue);
@@ -276,12 +279,14 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->cstats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+ q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
}
return skb;
}
@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
+ q->cstats.drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
q->cstats.drop_count++;
}
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ebe2c87586..d783d7cc3348 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- hfsc_purge_queue(sch, cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 86b04e31e60b..13d6f83ec491 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct hhf_sched_data *q = qdisc_priv(sch);
enum wdrr_bucket_idx idx;
struct wdrr_bucket *bucket;
+ unsigned int prev_backlog;
idx = hhf_classify(skb, sch);
@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet from this
* bucket.
@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this. */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, prev_backlog;
int err;
u64 non_hh_quantum;
u32 new_quantum = q->quantum;
@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
}
qlen = sch->q.qlen;
+ prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
kfree_skb(skb);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
+ prev_backlog - sch->qstats.backlog);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 15ccd7f8fb2a..87b02ed3d5f2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
htb_activate(q, cl);
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
ok:
qdisc_bstats_update(sch, skb);
qdisc_unthrottled(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
unsigned int len;
if (cl->un.leaf.q->ops->drop &&
(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
-
}
}
qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
cl->common.classid)) == NULL)
return -ENOBUFS;
- sch_tree_lock(sch);
- *old = cl->un.leaf.q;
- cl->un.leaf.q = new;
- if (*old != NULL) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->un.leaf.q);
return 0;
}
@@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- unsigned int qlen;
struct Qdisc *new_q = NULL;
int last_child = 0;
@@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
if (!cl->level) {
- qlen = cl->un.leaf.q->q.qlen;
+ unsigned int qlen = cl->un.leaf.q->q.qlen;
+ unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+
qdisc_reset(cl->un.leaf.q);
- qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
}
/* delete from hash and active; remainder in destroy_class */
@@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
+ unsigned int backlog = parent->un.leaf.q->qstats.backlog;
/* turn parent into inner node */
qdisc_reset(parent->un.leaf.q);
- qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
qdisc_destroy(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ad70ecf57ce7..f9947d1f4952 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
+ struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
unsigned int ntx;
if (priv->qdiscs) {
@@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
}
if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
- dev->netdev_ops->ndo_setup_tc(dev, 0);
+ dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
else
netdev_set_num_tc(dev, 0);
}
@@ -140,8 +141,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
+ struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
+ .tc = qopt->num_tc};
+
priv->hw_owned = 1;
- err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+ err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
goto err;
} else {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 4e904ca0af9d..bcdd54bb101c 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen,
+ child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5abd1d9de989..9640bb39a5d2 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -598,7 +598,8 @@ deliver:
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
qdisc_qstats_drop(sch);
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1,
+ qdisc_pkt_len(skb));
}
}
goto tfifo_dequeue;
@@ -1037,15 +1038,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{
struct netem_sched_data *q = qdisc_priv(sch);
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- if (*old) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba6487f2741f..fee1b15506b2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3dc3a6e56052..8d2d8d953432 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
static void qfq_purge_queue(struct qfq_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- qfq_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5bbb6332ec57..c69611640fa5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
@@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3abab534eb5c..498f0a2cb47f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -346,7 +346,7 @@ static int
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned int hash;
+ unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
int uninitialized_var(ret);
@@ -461,7 +461,7 @@ enqueue:
return NET_XMIT_SUCCESS;
qlen = slot->qlen;
- sfq_drop(sch);
+ dropped = sfq_drop(sch);
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
@@ -469,7 +469,7 @@ enqueue:
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, dropped);
return NET_XMIT_SUCCESS;
}
@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch)
struct sfq_slot *slot;
struct sk_buff_head list;
int dropped = 0;
+ unsigned int drop_len = 0;
__skb_queue_head_init(&list);
@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch)
if (x >= SFQ_MAX_FLOWS) {
drop:
qdisc_qstats_backlog_dec(sch, skb);
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
dropped++;
continue;
@@ -594,7 +596,7 @@ drop:
}
}
sch->q.qlen -= dropped;
- qdisc_tree_decrease_qlen(sch, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
static void sfq_perturbation(unsigned long arg)
@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
struct red_parms *p = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit)
- sfq_drop(sch);
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ dropped += sfq_drop(sch);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
nskb = segs->next;
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
+ len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
}
sch->q.qlen += nb;
if (nb > 1)
- qdisc_tree_decrease_qlen(sch, 1 - nb);
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
consume_skb(skb);
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a3380917f197..3aa43073e0b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -70,19 +70,6 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
return msg;
}
-void sctp_datamsg_free(struct sctp_datamsg *msg)
-{
- struct sctp_chunk *chunk;
-
- /* This doesn't have to be a _safe vairant because
- * sctp_chunk_free() only drops the refs.
- */
- list_for_each_entry(chunk, &msg->chunks, frag_list)
- sctp_chunk_free(chunk);
-
- sctp_datamsg_put(msg);
-}
-
/* Final destructruction of datamsg memory. */
static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
{
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 52838eaa1582..2522a6175291 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -333,7 +333,7 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
if (!ep->base.bind_addr.port)
goto out;
t = sctp_epaddr_lookup_transport(ep, paddr);
- if (!t || t->asoc->temp)
+ if (!t)
goto out;
*transport = t;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d9a6e66c5c8a..21a2d6b7abaf 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -784,6 +784,7 @@ hit:
/* rhashtable for transport */
struct sctp_hash_cmp_arg {
+ const struct sctp_endpoint *ep;
const union sctp_addr *laddr;
const union sctp_addr *paddr;
const struct net *net;
@@ -797,15 +798,20 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
struct sctp_association *asoc = t->asoc;
const struct net *net = x->net;
- if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
- return 1;
if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
return 1;
if (!net_eq(sock_net(asoc->base.sk), net))
return 1;
- if (!sctp_bind_addr_match(&asoc->base.bind_addr,
- x->laddr, sctp_sk(asoc->base.sk)))
- return 1;
+ if (x->ep) {
+ if (x->ep != asoc->ep)
+ return 1;
+ } else {
+ if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
+ return 1;
+ if (!sctp_bind_addr_match(&asoc->base.bind_addr,
+ x->laddr, sctp_sk(asoc->base.sk)))
+ return 1;
+ }
return 0;
}
@@ -832,9 +838,11 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
const struct sctp_hash_cmp_arg *x = data;
const union sctp_addr *paddr = x->paddr;
const struct net *net = x->net;
- u16 lport = x->laddr->v4.sin_port;
+ u16 lport;
u32 addr;
+ lport = x->ep ? htons(x->ep->base.bind_addr.port) :
+ x->laddr->v4.sin_port;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
@@ -864,12 +872,12 @@ void sctp_transport_hashtable_destroy(void)
void sctp_hash_transport(struct sctp_transport *t)
{
- struct sctp_sockaddr_entry *addr;
struct sctp_hash_cmp_arg arg;
- addr = list_entry(t->asoc->base.bind_addr.address_list.next,
- struct sctp_sockaddr_entry, list);
- arg.laddr = &addr->a;
+ if (t->asoc->temp)
+ return;
+
+ arg.ep = t->asoc->ep;
arg.paddr = &t->ipaddr;
arg.net = sock_net(t->asoc->base.sk);
@@ -881,6 +889,9 @@ reinsert:
void sctp_unhash_transport(struct sctp_transport *t)
{
+ if (t->asoc->temp)
+ return;
+
rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
sctp_hash_params);
}
@@ -891,6 +902,7 @@ struct sctp_transport *sctp_addrs_lookup_transport(
const union sctp_addr *paddr)
{
struct sctp_hash_cmp_arg arg = {
+ .ep = NULL,
.laddr = laddr,
.paddr = paddr,
.net = net,
@@ -904,13 +916,15 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
- struct sctp_sockaddr_entry *addr;
struct net *net = sock_net(ep->base.sk);
+ struct sctp_hash_cmp_arg arg = {
+ .ep = ep,
+ .paddr = paddr,
+ .net = net,
+ };
- addr = list_entry(ep->base.bind_addr.address_list.next,
- struct sctp_sockaddr_entry, list);
-
- return sctp_addrs_lookup_transport(net, &addr->a, paddr);
+ return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
+ sctp_hash_params);
}
/* Look up an association. */
@@ -921,15 +935,20 @@ static struct sctp_association *__sctp_lookup_association(
struct sctp_transport **pt)
{
struct sctp_transport *t;
+ struct sctp_association *asoc = NULL;
t = sctp_addrs_lookup_transport(net, local, peer);
- if (!t || t->dead || t->asoc->temp)
- return NULL;
+ if (!t || !sctp_transport_hold(t))
+ goto out;
- sctp_association_hold(t->asoc);
+ asoc = t->asoc;
+ sctp_association_hold(asoc);
*pt = t;
- return t->asoc;
+ sctp_transport_put(t);
+
+out:
+ return asoc;
}
/* Look up an association. protected by RCU read lock */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index dfa7eeccb537..cfc3c7101a38 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -161,12 +161,9 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
struct sctp_af *af;
primary = &assoc->peer.primary_addr;
- rcu_read_lock();
list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
transports) {
addr = &transport->ipaddr;
- if (transport->dead)
- continue;
af = sctp_get_af_specific(addr->sa.sa_family);
if (af->cmp_addr(addr, primary)) {
@@ -174,7 +171,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
}
af->seq_dump_addr(seq, addr);
}
- rcu_read_unlock();
}
static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
@@ -310,7 +306,7 @@ static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq)
static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq,
loff_t pos)
{
- void *obj;
+ void *obj = SEQ_START_TOKEN;
while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj))
pos--;
@@ -347,7 +343,7 @@ static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
if (err)
return ERR_PTR(err);
- return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN;
+ return sctp_transport_get_idx(seq, *pos);
}
static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
@@ -380,6 +376,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
}
transport = (struct sctp_transport *)v;
+ if (!sctp_transport_hold(transport))
+ return 0;
assoc = transport->asoc;
epb = &assoc->base;
sk = epb->sk;
@@ -412,6 +410,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sk->sk_rcvbuf);
seq_printf(seq, "\n");
+ sctp_transport_put(transport);
+
return 0;
}
@@ -462,7 +462,7 @@ static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
if (err)
return ERR_PTR(err);
- return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN;
+ return sctp_transport_get_idx(seq, *pos);
}
static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -489,12 +489,12 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
}
tsp = (struct sctp_transport *)v;
+ if (!sctp_transport_hold(tsp))
+ return 0;
assoc = tsp->asoc;
list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
transports) {
- if (tsp->dead)
- continue;
/*
* The remote address (ADDR)
*/
@@ -544,6 +544,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
}
+ sctp_transport_put(tsp);
+
return 0;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ab0d538a74ed..1099e99a53c4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
@@ -1355,6 +1357,8 @@ static __init int sctp_init(void)
unsigned long limit;
int max_share;
int order;
+ int num_entries;
+ int max_entry_order;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
@@ -1407,14 +1411,24 @@ static __init int sctp_init(void)
/* Size and allocate the association hash table.
* The methodology is similar to that of the tcp hash tables.
+ * Though not identical. Start by getting a goal size
*/
if (totalram_pages >= (128 * 1024))
goal = totalram_pages >> (22 - PAGE_SHIFT);
else
goal = totalram_pages >> (24 - PAGE_SHIFT);
- for (order = 0; (1UL << order) < goal; order++)
- ;
+ /* Then compute the page order for said goal */
+ order = get_order(goal);
+
+ /* Now compute the required page order for the maximum sized table we
+ * want to create
+ */
+ max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+ sizeof(struct sctp_bind_hashbucket));
+
+ /* Limit the page order by that maximum hash table size */
+ order = min(order, max_entry_order);
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
@@ -1430,20 +1444,35 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
}
- /* Allocate and initialize the SCTP port hash table. */
+ /* Allocate and initialize the SCTP port hash table.
+ * Note that order is initalized to start at the max sized
+ * table we want to support. If we can't get that many pages
+ * reduce the order and try again
+ */
do {
- sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
- sizeof(struct sctp_bind_hashbucket);
- if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
- continue;
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
__get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
+
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
status = -ENOMEM;
goto err_bhash_alloc;
}
+
+ /* Now compute the number of entries that will fit in the
+ * port hash space we allocated
+ */
+ num_entries = (1UL << order) * PAGE_SIZE /
+ sizeof(struct sctp_bind_hashbucket);
+
+ /* And finish by rounding it down to the nearest power of two
+ * this wastes some memory of course, but its needed because
+ * the hash function operates based on the assumption that
+ * that the number of entries is a power of two
+ */
+ sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
for (i = 0; i < sctp_port_hashsize; i++) {
spin_lock_init(&sctp_port_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
@@ -1452,7 +1481,8 @@ static __init int sctp_init(void)
if (sctp_transport_hashtable_init())
goto err_thash_alloc;
- pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize);
+ pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
+ num_entries);
sctp_sysctl_register();
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2e21384697c2..b5327bb77458 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -259,12 +259,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
goto out_unlock;
}
- /* Is this transport really dead and just waiting around for
- * the timer to let go of the reference?
- */
- if (transport->dead)
- goto out_unlock;
-
/* Run through the state machine. */
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
goto out_unlock;
}
- /* Is this structure just waiting around for us to actually
- * get destroyed?
- */
- if (transport->dead)
- goto out_unlock;
-
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
asoc->state, asoc->ep, asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9bb80ec4c08f..b89501e5c1a1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5538,6 +5538,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
+ int i;
if (!ep->auth_enable)
return -EACCES;
@@ -5555,8 +5556,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
return -EFAULT;
if (put_user(num_idents, &p->shmac_num_idents))
return -EFAULT;
- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
- return -EFAULT;
+ for (i = 0; i < num_idents; i++) {
+ __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+ return -EFAULT;
+ }
return 0;
}
@@ -6101,9 +6106,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
return retval;
}
-static void sctp_hash(struct sock *sk)
+static int sctp_hash(struct sock *sk)
{
/* STUB */
+ return 0;
}
static void sctp_unhash(struct sock *sk)
@@ -6636,6 +6642,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6659,6 +6666,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index aab9e3f29755..a431c14044a4 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -132,8 +132,6 @@ fail:
*/
void sctp_transport_free(struct sctp_transport *transport)
{
- transport->dead = 1;
-
/* Try to delete the heartbeat timer. */
if (del_timer(&transport->hb_timer))
sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
*/
static void sctp_transport_destroy(struct sctp_transport *transport)
{
- if (unlikely(!transport->dead)) {
+ if (unlikely(atomic_read(&transport->refcnt))) {
WARN(1, "Attempt to destroy undead transport %p!\n", transport);
return;
}
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
}
/* Hold a reference to a transport. */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
{
- atomic_inc(&transport->refcnt);
+ return atomic_add_unless(&transport->refcnt, 1, 0);
}
/* Release a reference to a transport and clean up
diff --git a/net/socket.c b/net/socket.c
index 91c2de6f5020..c044d1e8508c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -294,7 +294,7 @@ static int init_inodecache(void)
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD | SLAB_ACCOUNT),
init_once);
if (sock_inode_cachep == NULL)
return -ENOMEM;
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 59eeed43eda2..f0c6a8c78a56 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -326,6 +326,9 @@ int gssp_accept_sec_context_upcall(struct net *net,
if (data->found_creds && client_name.data != NULL) {
char *c;
+ data->creds.cr_raw_principal = kstrndup(client_name.data,
+ client_name.len, GFP_KERNEL);
+
data->creds.cr_principal = kstrndup(client_name.data,
client_name.len, GFP_KERNEL);
if (data->creds.cr_principal) {
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5e4f815c2b34..2b32fd602669 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
if (count == 0)
return 0;
- mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+ inode_lock(inode); /* protect against multiple concurrent
* readers on this file */
again:
spin_lock(&queue_lock);
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (rp->q.list.next == &cd->queue) {
spin_unlock(&queue_lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(rp->offset);
return 0;
}
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (err == -EAGAIN)
goto again;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err ? err : count;
}
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
if (!cd->cache_parse)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = cache_downcall(mapping, buf, count, cd);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return ret;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 23608eb0ded2..b7f21044f4d8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
return -EINVAL;
memcpy(buf, &rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
+ break;
default:
dprintk("RPC: %s: address family not supported\n",
__func__);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index d81186d34558..31789ef3e614 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
int need_release;
LIST_HEAD(free_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock(&pipe->lock);
need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
cancel_delayed_work_sync(&pipe->queue_timeout);
rpc_inode_setowner(inode, NULL);
RPC_I(inode)->pipe = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
int first_open;
int res = -ENXIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
pipe->nwriters++;
res = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
struct rpc_pipe_msg *msg;
int last_close;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
if (last_close && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
struct rpc_pipe_msg *msg;
int res = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
pipe->ops->destroy_msg(msg);
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
struct inode *inode = file_inode(filp);
int res;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = -EPIPE;
if (RPC_I(inode)->pipe != NULL)
res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &rpci->waitq, wait);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (rpci->pipe == NULL)
mask |= POLLERR | POLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
mask |= POLLIN | POLLRDNORM;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return mask;
}
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FIONREAD:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -EPIPE;
}
spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
len += msg->len - msg->copied;
}
spin_unlock(&pipe->lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(len, (int __user *)arg);
default:
return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
{
struct inode *dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dir, I_MUTEX_CHILD);
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
}
static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
struct dentry *dentry;
int i, err;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
for (i = start; i < eof; i++) {
dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
if (err != 0)
goto out_bad;
}
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return 0;
out_bad:
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
struct inode *dir = d_inode(parent);
int error;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
goto err_rmdir;
}
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
err_rmdir:
__rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
if (depopulate != NULL)
depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~S_IWUGO;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (err)
goto out_err;
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out_err:
dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmpipe(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -1500,7 +1500,7 @@ int register_rpc_pipefs(void)
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index a6cbb2104667..7422f28818b2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -10,11 +10,13 @@
#include <linux/kthread.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprt.h>
#include <linux/module.h>
+#include <linux/netdevice.h>
#include <trace/events/sunrpc.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
@@ -938,6 +940,49 @@ static void svc_age_temp_xprts(unsigned long closure)
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}
+/* Close temporary transports whose xpt_local matches server_addr immediately
+ * instead of waiting for them to be picked up by the timer.
+ *
+ * This is meant to be called from a notifier_block that runs when an ip
+ * address is deleted.
+ */
+void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
+{
+ struct svc_xprt *xprt;
+ struct svc_sock *svsk;
+ struct socket *sock;
+ struct list_head *le, *next;
+ LIST_HEAD(to_be_closed);
+ struct linger no_linger = {
+ .l_onoff = 1,
+ .l_linger = 0,
+ };
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_safe(le, next, &serv->sv_tempsocks) {
+ xprt = list_entry(le, struct svc_xprt, xpt_list);
+ if (rpc_cmp_addr(server_addr, (struct sockaddr *)
+ &xprt->xpt_local)) {
+ dprintk("svc_age_temp_xprts_now: found %p\n", xprt);
+ list_move(le, &to_be_closed);
+ }
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ while (!list_empty(&to_be_closed)) {
+ le = to_be_closed.next;
+ list_del_init(le);
+ xprt = list_entry(le, struct svc_xprt, xpt_list);
+ dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
+ svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ sock = svsk->sk_sock;
+ kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&no_linger, sizeof(no_linger));
+ svc_close_xprt(xprt);
+ }
+}
+EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now);
+
static void call_xpt_users(struct svc_xprt *xprt)
{
struct svc_xpt_user *u;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 79c0f3459b5c..69841db1f533 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -55,6 +55,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
+ init_svc_cred(&rqstp->rq_cred);
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
@@ -63,6 +64,7 @@ EXPORT_SYMBOL_GPL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp)
{
+ rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 621ca7b4a155..dfacdc95b3f5 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -728,10 +728,6 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
- cred->cr_group_info = NULL;
- cred->cr_principal = NULL;
- rqstp->rq_client = NULL;
-
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
@@ -794,10 +790,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
u32 slen, i;
int len = argv->iov_len;
- cred->cr_group_info = NULL;
- cred->cr_principal = NULL;
- rqstp->rq_client = NULL;
-
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2e98f4a243e5..37edea6fa92d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
+EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 33f99d3004f2..dc9f3b513a05 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o \
- svc_rdma.o svc_rdma_transport.o \
+ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2dcb44f69e53..cc1251d07297 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -15,7 +15,7 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-#define RPCRDMA_BACKCHANNEL_DEBUG
+#undef RPCRDMA_BACKCHANNEL_DEBUG
static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
size_t size;
req = rpcrdma_create_req(r_xprt);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->rl_backchannel = true;
size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
@@ -84,9 +84,7 @@ out_fail:
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- unsigned long flags;
int rc = 0;
while (count--) {
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
break;
}
- spin_lock_irqsave(&buffers->rb_lock, flags);
- list_add(&rep->rr_list, &buffers->rb_recv_bufs);
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ rpcrdma_recv_buffer_put(rep);
}
return rc;
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
__func__);
goto out_free;
}
+ dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpclen = rqst->rq_svec[0].iov_len;
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
__func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
pr_info("RPC: %s: RPC/RDMA: %*ph\n",
__func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
pr_info("RPC: %s: RPC: %*ph\n",
__func__, (int)rpclen, rqst->rq_svec[0].iov_base);
+#endif
req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
+ dprintk("RPC: %s: freeing rqst %p (req %p)\n",
+ __func__, rqst, rpcr_to_rdmar(rqst));
+
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
-#ifdef RPCRDMA_BACKCHANNEL_DEBUG
- pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
-#endif
+ dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* direction reply.
*/
req = rpcr_to_rdmar(rqst);
-#ifdef RPCRDMA_BACKCHANNEL_DEBUG
- pr_info("RPC: %s: attaching rep %p to req %p\n",
+ dprintk("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req);
-#endif
req->rl_reply = rep;
/* Defeat the retransmit detection logic in send_request */
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f1e8dafbd507..c14f3a4bff68 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -179,6 +179,69 @@ out_maperr:
return rc;
}
+static void
+__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ int nsegs = seg->mr_nsegs;
+
+ seg->rl_mw = NULL;
+
+ while (nsegs--)
+ rpcrdma_unmap_one(device, seg++);
+
+ rpcrdma_put_mw(r_xprt, mw);
+}
+
+/* Invalidate all memory regions that were registered for "req".
+ *
+ * Sleeps until it is safe for the host CPU to access the
+ * previously mapped memory regions.
+ */
+static void
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct rpcrdma_mr_seg *seg;
+ unsigned int i, nchunks;
+ struct rpcrdma_mw *mw;
+ LIST_HEAD(unmap_list);
+ int rc;
+
+ dprintk("RPC: %s: req %p\n", __func__, req);
+
+ /* ORDER: Invalidate all of the req's MRs first
+ *
+ * ib_unmap_fmr() is slow, so use a single call instead
+ * of one call per mapped MR.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+ mw = seg->rl_mw;
+
+ list_add(&mw->r.fmr.fmr->list, &unmap_list);
+
+ i += seg->mr_nsegs;
+ }
+ rc = ib_unmap_fmr(&unmap_list);
+ if (rc)
+ pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
+
+ /* ORDER: Now DMA unmap all of the req's MRs, and return
+ * them to the free MW list.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ __fmr_dma_unmap(r_xprt, seg);
+
+ i += seg->mr_nsegs;
+ seg->mr_nsegs = 0;
+ }
+
+ req->rl_nchunks = 0;
+}
+
/* Use the ib_unmap_fmr() verb to prevent further remote
* access via RDMA READ or RDMA WRITE.
*/
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
+ .ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap = fmr_op_unmap,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 88cf9e7269c2..e16567389e28 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -190,12 +190,11 @@ static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
int depth, delta;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- devattr->max_fast_reg_page_list_len);
+ ia->ri_device->attrs.max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
- cdata->max_requests = devattr->max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+ cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -245,12 +244,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
}
-/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
+/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
+ * to be reset.
+ *
+ * WARNING: Only wr_id and status are reliable at this point
+ */
static void
-frwr_sendcompletion(struct ib_wc *wc)
+__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
{
- struct rpcrdma_mw *r;
-
if (likely(wc->status == IB_WC_SUCCESS))
return;
@@ -261,9 +262,23 @@ frwr_sendcompletion(struct ib_wc *wc)
else
pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status);
+
r->r.frmr.fr_state = FRMR_IS_STALE;
}
+static void
+frwr_sendcompletion(struct ib_wc *wc)
+{
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ struct rpcrdma_frmr *f = &r->r.frmr;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ __frwr_sendcompletion_flush(wc, r);
+
+ if (f->fr_waiter)
+ complete(&f->fr_linv_done);
+}
+
static int
frwr_op_init(struct rpcrdma_xprt *r_xprt)
{
@@ -319,7 +334,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
- struct ib_reg_wr reg_wr;
+ struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n, dma_nents;
u8 key;
@@ -335,7 +350,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID;
+ frmr->fr_waiter = false;
mr = frmr->fr_mr;
+ reg_wr = &frmr->fr_regwr;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
@@ -381,19 +398,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
- reg_wr.wr.next = NULL;
- reg_wr.wr.opcode = IB_WR_REG_MR;
- reg_wr.wr.wr_id = (uintptr_t)mw;
- reg_wr.wr.num_sge = 0;
- reg_wr.wr.send_flags = 0;
- reg_wr.mr = mr;
- reg_wr.key = mr->rkey;
- reg_wr.access = writing ?
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
- IB_ACCESS_REMOTE_READ;
+ reg_wr->wr.next = NULL;
+ reg_wr->wr.opcode = IB_WR_REG_MR;
+ reg_wr->wr.wr_id = (uintptr_t)mw;
+ reg_wr->wr.num_sge = 0;
+ reg_wr->wr.send_flags = 0;
+ reg_wr->mr = mr;
+ reg_wr->key = mr->rkey;
+ reg_wr->access = writing ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
@@ -413,6 +430,116 @@ out_senderr:
return rc;
}
+static struct ib_send_wr *
+__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
+{
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ struct rpcrdma_frmr *f = &mw->r.frmr;
+ struct ib_send_wr *invalidate_wr;
+
+ f->fr_waiter = false;
+ f->fr_state = FRMR_IS_INVALID;
+ invalidate_wr = &f->fr_invwr;
+
+ memset(invalidate_wr, 0, sizeof(*invalidate_wr));
+ invalidate_wr->wr_id = (unsigned long)(void *)mw;
+ invalidate_wr->opcode = IB_WR_LOCAL_INV;
+ invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
+
+ return invalidate_wr;
+}
+
+static void
+__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+ int rc)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ struct rpcrdma_frmr *f = &mw->r.frmr;
+
+ seg->rl_mw = NULL;
+
+ ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
+
+ if (!rc)
+ rpcrdma_put_mw(r_xprt, mw);
+ else
+ __frwr_queue_recovery(mw);
+}
+
+/* Invalidate all memory regions that were registered for "req".
+ *
+ * Sleeps until it is safe for the host CPU to access the
+ * previously mapped memory regions.
+ */
+static void
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_mr_seg *seg;
+ unsigned int i, nchunks;
+ struct rpcrdma_frmr *f;
+ int rc;
+
+ dprintk("RPC: %s: req %p\n", __func__, req);
+
+ /* ORDER: Invalidate all of the req's MRs first
+ *
+ * Chain the LOCAL_INV Work Requests and post them with
+ * a single ib_post_send() call.
+ */
+ invalidate_wrs = pos = prev = NULL;
+ seg = NULL;
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ pos = __frwr_prepare_linv_wr(seg);
+
+ if (!invalidate_wrs)
+ invalidate_wrs = pos;
+ else
+ prev->next = pos;
+ prev = pos;
+
+ i += seg->mr_nsegs;
+ }
+ f = &seg->rl_mw->r.frmr;
+
+ /* Strong send queue ordering guarantees that when the
+ * last WR in the chain completes, all WRs in the chain
+ * are complete.
+ */
+ f->fr_invwr.send_flags = IB_SEND_SIGNALED;
+ f->fr_waiter = true;
+ init_completion(&f->fr_linv_done);
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+
+ /* Transport disconnect drains the receive CQ before it
+ * replaces the QP. The RPC reply handler won't call us
+ * unless ri_id->qp is a valid pointer.
+ */
+ rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
+ if (rc)
+ pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
+
+ wait_for_completion(&f->fr_linv_done);
+
+ /* ORDER: Now DMA unmap all of the req's MRs, and return
+ * them to the free MW list.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ __frwr_dma_unmap(r_xprt, seg, rc);
+
+ i += seg->mr_nsegs;
+ seg->mr_nsegs = 0;
+ }
+
+ req->rl_nchunks = 0;
+}
+
/* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE.
*/
@@ -423,23 +550,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw;
struct rpcrdma_frmr *frmr = &mw->r.frmr;
- struct ib_send_wr invalidate_wr, *bad_wr;
+ struct ib_send_wr *invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
frmr->fr_state = FRMR_IS_INVALID;
+ invalidate_wr = &mw->r.frmr.fr_invwr;
- memset(&invalidate_wr, 0, sizeof(invalidate_wr));
- invalidate_wr.wr_id = (unsigned long)(void *)mw;
- invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
+ memset(invalidate_wr, 0, sizeof(*invalidate_wr));
+ invalidate_wr->wr_id = (uintptr_t)mw;
+ invalidate_wr->opcode = IB_WR_LOCAL_INV;
+ invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock);
- rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
if (rc)
goto out_err;
@@ -471,6 +599,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 617b76f22154..dbb302ecf590 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
return 1;
}
+/* DMA unmap all memory regions that were mapped for "req".
+ */
+static void
+physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ unsigned int i;
+
+ for (i = 0; req->rl_nchunks; --req->rl_nchunks)
+ rpcrdma_unmap_one(device, &req->rl_segments[i++]);
+}
+
static void
physical_op_destroy(struct rpcrdma_buffer *buf)
{
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map,
+ .ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap = physical_op_unmap,
.ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c10d9699441c..0f28f2d743ed 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (req->rl_reply)
goto out_duplicate;
+ /* Sanity checking has passed. We are now committed
+ * to complete this transaction.
+ */
+ list_del_init(&rqst->rq_list);
+ spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n",
__func__, rep, req, rqst,
@@ -888,12 +893,23 @@ badheader:
break;
}
+ /* Invalidate and flush the data payloads before waking the
+ * waiting application. This guarantees the memory region is
+ * properly fenced from the server before the application
+ * accesses the data. It also ensures proper send flow
+ * control: waking the next RPC waits until this RPC has
+ * relinquished all its Send Queue entries.
+ */
+ if (req->rl_nchunks)
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
+
credits = be32_to_cpu(headerp->rm_credit);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_max_requests)
credits = r_xprt->rx_buf.rb_max_requests;
+ spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 1b7051bdbdc8..c846ca9f1eba 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
struct workqueue_struct *svc_rdma_wq;
/*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
svc_unreg_xprt_class(&svc_rdma_bc_class);
#endif
svc_unreg_xprt_class(&svc_rdma_class);
- kmem_cache_destroy(svc_rdma_map_cachep);
- kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
- dprintk("\tsq_depth : %d\n",
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tsq_depth : %u\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
- /* Create the temporary map cache */
- svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
- sizeof(struct svc_rdma_req_map),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_map_cachep) {
- printk(KERN_INFO "Could not allocate map cache.\n");
- goto err0;
- }
-
- /* Create the temporary context cache */
- svc_rdma_ctxt_cachep =
- kmem_cache_create("svc_rdma_ctxt_cache",
- sizeof(struct svc_rdma_op_ctxt),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_ctxt_cachep) {
- printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
- goto err1;
- }
-
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_reg_xprt_class(&svc_rdma_bc_class);
#endif
return 0;
- err1:
- kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
- unregister_sysctl_table(svcrdma_table_header);
- destroy_workqueue(svc_rdma_wq);
- return -ENOMEM;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644
index 000000000000..65a7c232a345
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct kvec *dst, *src = &rcvbuf->head[0];
+ struct rpc_rqst *req;
+ unsigned long cwnd;
+ u32 credits;
+ size_t len;
+ __be32 xid;
+ __be32 *p;
+ int ret;
+
+ p = (__be32 *)src->iov_base;
+ len = src->iov_len;
+ xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: xid=%08x, length=%zu\n",
+ __func__, be32_to_cpu(xid), len);
+ pr_info("%s: RPC/RDMA: %*ph\n",
+ __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+ pr_info("%s: RPC: %*ph\n",
+ __func__, (int)len, p);
+#endif
+
+ ret = -EAGAIN;
+ if (src->iov_len < 24)
+ goto out_shortreply;
+
+ spin_lock_bh(&xprt->transport_lock);
+ req = xprt_lookup_rqst(xprt, xid);
+ if (!req)
+ goto out_notfound;
+
+ dst = &req->rq_private_buf.head[0];
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+ if (dst->iov_len < len)
+ goto out_unlock;
+ memcpy(dst->iov_base, p, len);
+
+ credits = be32_to_cpu(rmsgp->rm_credit);
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+ credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+ cwnd = xprt->cwnd;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(req->rq_task);
+
+ ret = 0;
+ xprt_complete_rqst(req->rq_task, rcvbuf->len);
+ rcvbuf->len = 0;
+
+out_unlock:
+ spin_unlock_bh(&xprt->transport_lock);
+out:
+ return ret;
+
+out_shortreply:
+ dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+ xprt, src->iov_len);
+ goto out;
+
+out_notfound:
+ dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+ xprt, be32_to_cpu(xid));
+
+ goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+ struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_req_map *vec;
+ struct ib_send_wr send_wr;
+ int ret;
+
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+ if (ret)
+ goto out_err;
+
+ /* Post a recv buffer to handle the reply for this request. */
+ ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+ if (ret) {
+ pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ goto out_err;
+ }
+
+ ctxt = svc_rdma_get_context(rdma);
+ ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+ ctxt->count = 1;
+
+ ctxt->wr_op = IB_WR_SEND;
+ ctxt->direction = DMA_TO_DEVICE;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->sge[0].length = sndbuf->len;
+ ctxt->sge[0].addr =
+ ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+ sndbuf->len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+ atomic_inc(&rdma->sc_dma_used);
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.wr_id = (unsigned long)ctxt;
+ send_wr.sg_list = ctxt->sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = svc_rdma_send(rdma, &send_wr);
+ if (ret) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+
+out_err:
+ svc_rdma_put_req_map(rdma, vec);
+ dprintk("svcrdma: %s returns %d\n", __func__, ret);
+ return ret;
+
+out_unmap:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ struct page *page;
+
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+ /* Prevent an infinite loop: try to make this case work */
+ if (size > PAGE_SIZE)
+ WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+ size);
+
+ page = alloc_page(RPCRDMA_DEF_GFP);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+ /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+ int rc;
+
+ /* Space in the send buffer for an RPC/RDMA header is reserved
+ * via xprt->tsh_size.
+ */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+ headerp->rm_type = rdma_msg;
+ headerp->rm_body.rm_chunks[0] = xdr_zero;
+ headerp->rm_body.rm_chunks[1] = xdr_zero;
+ headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+ rc = svc_rdma_bc_sendto(rdma, rqst);
+ if (rc)
+ goto drop_connection;
+ return rc;
+
+drop_connection:
+ dprintk("svcrdma: failed to send bc call\n");
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ int ret;
+
+ dprintk("svcrdma: sending bc call with xid: %08x\n",
+ be32_to_cpu(rqst->rq_xid));
+
+ if (!mutex_trylock(&sxprt->xpt_mutex)) {
+ rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+ if (!mutex_trylock(&sxprt->xpt_mutex))
+ return -EAGAIN;
+ rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+ }
+
+ ret = -ENOTCONN;
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+
+ mutex_unlock(&sxprt->xpt_mutex);
+
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+ xprt_free(xprt);
+ module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+ .reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
+ .alloc_slot = xprt_alloc_slot,
+ .release_request = xprt_release_rqst_cong,
+ .buf_alloc = xprt_rdma_bc_allocate,
+ .buf_free = xprt_rdma_bc_free,
+ .send_request = xprt_rdma_bc_send_request,
+ .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .close = xprt_rdma_bc_close,
+ .destroy = xprt_rdma_bc_put,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+ .to_initval = 60 * HZ,
+ .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+ RPCRDMA_MAX_BC_REQUESTS,
+ RPCRDMA_MAX_BC_REQUESTS);
+ if (!xprt) {
+ dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->timeout = &xprt_rdma_bc_timeout;
+ xprt_set_bound(xprt);
+ xprt_set_connected(xprt);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+ xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+ xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+ xprt->ops = &xprt_rdma_bc_procs;
+
+ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+ xprt->addrlen = args->addrlen;
+ xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+ xprt->resvport = 0;
+
+ xprt->max_payload = xprt_rdma_max_inline_read;
+
+ new_xprt = rpcx_to_rdmax(xprt);
+ new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+ xprt_get(xprt);
+ args->bc_xprt->xpt_bc_xprt = xprt;
+ xprt->bc_xprt = args->bc_xprt;
+
+ if (!try_module_get(THIS_MODULE))
+ goto out_fail;
+
+ /* Final put for backchannel xprt is in __svc_rdma_free */
+ xprt_get(xprt);
+ return xprt;
+
+out_fail:
+ xprt_rdma_free_addresses(xprt);
+ args->bc_xprt->xpt_bc_xprt = NULL;
+ xprt_put(xprt);
+ xprt_free(xprt);
+ return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+ .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
+ .name = "rdma backchannel",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_BC_RDMA,
+ .setup = xprt_setup_rdma_bc,
+};
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ff4f01e527ec..c8b8a8b4181e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
+
head->arg.len += len;
if (!pg_off)
head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
goto err;
atomic_inc(&xprt->sc_dma_used);
- /* The lkey here is either a local dma lkey or a dma_mr lkey */
- ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
return ret;
}
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+ __be32 *p = (__be32 *)rmsgp;
+
+ if (!xprt->xpt_bc_xprt)
+ return false;
+
+ if (rmsgp->rm_type != rdma_msg)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+ return false;
+
+ /* sanity */
+ if (p[7] != rmsgp->rm_xid)
+ return false;
+ /* call direction */
+ if (p[8] == cpu_to_be32(RPC_CALL))
+ return false;
+
+ return true;
+}
+
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto close_out;
}
+ if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+ ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+ &rqstp->rq_arg);
+ svc_rdma_put_context(ctxt, 0);
+ if (ret)
+ goto repost;
+ return ret;
+ }
+
/* Read read-list data. */
ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
set_bit(XPT_CLOSE, &xprt->xpt_flags);
defer:
return 0;
+
+repost:
+ ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+ if (ret) {
+ pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+ set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ }
+ return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 969a1ab75fc3..df57f3ce6cd2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-static int map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
if (xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
- pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
return -EIO;
}
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
- dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ dprintk("svcrdma: %s: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
- sge_no, page_no, xdr->page_base, xdr->page_len,
+ __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
+ sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int ret;
/* Post a recv buffer to handle another request. */
- ret = svc_rdma_post_recv(rdma);
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
- ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
- vec = svc_rdma_get_req_map();
- ret = map_xdr(rdma, &rqstp->rq_res, vec);
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
if (ret)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ ret = -ENOMEM;
+ res_page = alloc_page(GFP_KERNEL);
+ if (!res_page)
+ goto err0;
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
err1:
put_page(res_page);
err0:
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
svc_rdma_put_context(ctxt, 0);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index b348b4adef29..5763825d09bf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+ gfp_t flags)
{
struct svc_rdma_op_ctxt *ctxt;
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->dto_q);
+ ctxt = kmalloc(sizeof(*ctxt), flags);
+ if (ctxt) {
+ ctxt->xprt = xprt;
+ INIT_LIST_HEAD(&ctxt->free);
+ INIT_LIST_HEAD(&ctxt->dto_q);
+ }
+ return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* Each RPC/RDMA credit can consume a number of send
+ * and receive WQEs. One ctxt is allocated for each.
+ */
+ i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+ while (i--) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+ if (!ctxt) {
+ dprintk("svcrdma: No memory for RDMA ctxt\n");
+ return false;
+ }
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ }
+ return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_op_ctxt *ctxt = NULL;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used++;
+ if (list_empty(&xprt->sc_ctxts))
+ goto out_empty;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del_init(&ctxt->free);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
ctxt->count = 0;
ctxt->frmr = NULL;
- atomic_inc(&xprt->sc_ctxt_used);
return ctxt;
+
+out_empty:
+ /* Either pre-allocation missed the mark, or send
+ * queue accounting is broken.
+ */
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+ ctxt = alloc_ctxt(xprt, GFP_NOIO);
+ if (ctxt)
+ goto out;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+ WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+ return NULL;
}
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
- * the sc_dma_lkey, otherwise, ignore it since it is
+ * the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+ if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
- struct svcxprt_rdma *xprt;
+ struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
- xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
- atomic_dec(&xprt->sc_ctxt_used);
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
}
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+ while (!list_empty(&xprt->sc_ctxts)) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del(&ctxt->free);
+ kfree(ctxt);
+ }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
{
struct svc_rdma_req_map *map;
- map = kmem_cache_alloc(svc_rdma_map_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
+
+ map = kmalloc(sizeof(*map), flags);
+ if (map)
+ INIT_LIST_HEAD(&map->free);
+ return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* One for each receive buffer on this connection. */
+ i = xprt->sc_max_requests;
+
+ while (i--) {
+ struct svc_rdma_req_map *map;
+
+ map = alloc_req_map(GFP_KERNEL);
+ if (!map) {
+ dprintk("svcrdma: No memory for request map\n");
+ return false;
+ }
+ list_add(&map->free, &xprt->sc_maps);
+ }
+ return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_req_map *map = NULL;
+
+ spin_lock(&xprt->sc_map_lock);
+ if (list_empty(&xprt->sc_maps))
+ goto out_empty;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del_init(&map->free);
+ spin_unlock(&xprt->sc_map_lock);
+
+out:
map->count = 0;
return map;
+
+out_empty:
+ spin_unlock(&xprt->sc_map_lock);
+
+ /* Pre-allocation amount was incorrect */
+ map = alloc_req_map(GFP_NOIO);
+ if (map)
+ goto out;
+
+ WARN_ONCE(1, "svcrdma: empty request map list?\n");
+ return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+ struct svc_rdma_req_map *map)
+{
+ spin_lock(&xprt->sc_map_lock);
+ list_add(&map->free, &xprt->sc_maps);
+ spin_unlock(&xprt->sc_map_lock);
}
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
{
- kmem_cache_free(svc_rdma_map_cachep, map);
+ while (!list_empty(&xprt->sc_maps)) {
+ struct svc_rdma_req_map *map;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del(&map->free);
+ kfree(map);
+ }
}
/* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
static void process_context(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt)
{
+ struct svc_rdma_op_ctxt *read_hdr;
+ int free_pages = 0;
+
svc_rdma_unmap_dma(ctxt);
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (ctxt->frmr)
- pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 1);
+ free_pages = 1;
break;
case IB_WR_RDMA_WRITE:
- if (ctxt->frmr)
- pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
- if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
- struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- if (read_hdr) {
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
- } else {
- pr_err("svcrdma: ctxt->read_hdr == NULL\n");
- }
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
+
+ if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+ break;
+
+ read_hdr = ctxt->read_hdr;
svc_rdma_put_context(ctxt, 0);
- break;
+
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ return;
default:
- printk(KERN_ERR "svcrdma: unexpected completion type, "
- "opcode=%d\n",
- ctxt->wr_op);
+ dprintk("svcrdma: unexpected completion opcode=%d\n",
+ ctxt->wr_op);
break;
}
+
+ svc_rdma_put_context(ctxt, free_pages);
}
/*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_maps);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
- cma_xprt->sc_ord = svcrdma_ord;
-
- cma_xprt->sc_max_req_size = svcrdma_max_req_size;
- cma_xprt->sc_max_requests = svcrdma_max_requests;
- cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
- atomic_set(&cma_xprt->sc_sq_count, 0);
- atomic_set(&cma_xprt->sc_ctxt_used, 0);
+ spin_lock_init(&cma_xprt->sc_ctxt_lock);
+ spin_lock_init(&cma_xprt->sc_map_lock);
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ page = alloc_page(flags);
+ if (!page)
+ goto err_put_ctxt;
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
atomic_inc(&xprt->sc_dma_used);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_cq_init_attr cq_attr = {};
struct ib_qp_init_attr qp_attr;
- struct ib_device_attr devattr;
- int uninitialized_var(dma_mr_acc);
- int need_dma_mr = 0;
- int ret;
- int i;
+ struct ib_device *dev;
+ unsigned int i;
+ int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
newxprt, newxprt->sc_cm_id);
- ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
- if (ret) {
- dprintk("svcrdma: could not query device attributes on "
- "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
- goto errout;
- }
+ dev = newxprt->sc_cm_id->device;
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+ newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
- newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+ newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
RPCSVC_MAXPAGES);
- newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
- (size_t)svcrdma_max_requests);
- newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_requests);
+ newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_bc_requests);
+ newxprt->sc_rq_depth = newxprt->sc_max_requests +
+ newxprt->sc_max_bc_requests;
+ newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+ if (!svc_rdma_prealloc_ctxts(newxprt))
+ goto errout;
+ if (!svc_rdma_prealloc_maps(newxprt))
+ goto errout;
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
- newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+ newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+ newxprt->sc_pd = ib_alloc_pd(dev);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
}
cq_attr.cqe = newxprt->sc_sq_depth;
- newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ newxprt->sc_sq_cq = ib_create_cq(dev,
sq_comp_handler,
cq_event_handler,
newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
- cq_attr.cqe = newxprt->sc_max_requests;
- newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ cq_attr.cqe = newxprt->sc_rq_depth;
+ newxprt->sc_rq_cq = ib_create_cq(dev,
rq_comp_handler,
cq_event_handler,
newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
- qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+ qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
" cap.max_send_sge = %d\n"
" cap.max_recv_sge = %d\n",
newxprt->sc_cm_id, newxprt->sc_pd,
- newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+ dev, newxprt->sc_pd->device,
qp_attr.cap.max_send_wr,
qp_attr.cap.max_recv_wr,
qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
* of an RDMA_READ. IB does not.
*/
newxprt->sc_reader = rdma_read_chunk_lcl;
- if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
- devattr.max_fast_reg_page_list_len;
+ dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
}
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
- if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !rdma_ib_or_roce(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+ !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
goto errout;
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
- !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
- dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
- }
-
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
- /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
- if (need_dma_mr) {
- /* Register all of physical memory */
- newxprt->sc_phys_mr =
- ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
- if (IS_ERR(newxprt->sc_phys_mr)) {
- dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
- ret);
- goto errout;
- }
- newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
- } else
- newxprt->sc_dma_lkey =
- newxprt->sc_cm_id->device->local_dma_lkey;
-
/* Post receive buffers */
- for (i = 0; i < newxprt->sc_max_requests; i++) {
- ret = svc_rdma_post_recv(newxprt);
+ for (i = 0; i < newxprt->sc_rq_depth; i++) {
+ ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
if (ret) {
dprintk("svcrdma: failure posting receive buffers\n");
goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+
+ dprintk("svcrdma: %s(%p)\n", __func__, rdma);
/* We should only be called from kref_put */
- if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ if (atomic_read(&xprt->xpt_ref.refcount) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+ atomic_read(&xprt->xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
}
/* Warn if we leaked a resource or under-referenced */
- if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
- atomic_read(&rdma->sc_ctxt_used));
+ rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used));
- /* De-allocate fastreg mr */
+ /* Final put of backchannel client transport */
+ if (xprt->xpt_bc_xprt) {
+ xprt_put(xprt->xpt_bc_xprt);
+ xprt->xpt_bc_xprt = NULL;
+ }
+
rdma_dealloc_frmr_q(rdma);
+ svc_rdma_destroy_ctxts(rdma);
+ svc_rdma_destroy_maps(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
ib_destroy_cq(rdma->sc_rq_cq);
- if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
- ib_dereg_mr(rdma->sc_phys_mr);
-
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
int length;
int ret;
- p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return;
va = page_address(p);
/* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
return;
}
atomic_inc(&xprt->sc_dma_used);
- ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[0].length = length;
/* Prepare SEND WR */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8c545f7d7525..b1b009f10ea3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -63,7 +63,7 @@
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
#endif
-#define RPCRDMA_BIND_TO (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
}
-static void
+void
xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
{
char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
}
-static void
+void
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
{
unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
if (req == NULL)
return NULL;
- flags = GFP_NOIO | __GFP_NOWARN;
+ flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -576,6 +571,9 @@ xprt_rdma_free(void *buffer)
rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
req = rb->rg_owner;
+ if (req->rl_backchannel)
+ return;
+
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
@@ -639,7 +637,7 @@ drop_connection:
return -ENOTCONN; /* implies disconnect */
}
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
long idle_time = 0;
@@ -740,6 +738,11 @@ void xprt_rdma_cleanup(void)
rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
+
+ rc = xprt_unregister_transport(&xprt_rdma_bc);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
+ __func__, rc);
}
int xprt_rdma_init(void)
@@ -763,6 +766,14 @@ int xprt_rdma_init(void)
return rc;
}
+ rc = xprt_register_transport(&xprt_rdma_bc);
+ if (rc) {
+ xprt_unregister_transport(&xprt_rdma);
+ rpcrdma_destroy_wq();
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index eadd1655145a..878f1bfb1db9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -462,7 +462,6 @@ int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct ib_device_attr *devattr = &ia->ri_devattr;
int rc;
ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2;
}
- rc = ib_query_device(ia->ri_device, devattr);
- if (rc) {
- dprintk("RPC: %s: ib_query_device failed %d\n",
- __func__, rc);
- goto out3;
- }
-
if (memreg == RPCRDMA_FRMR) {
- if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
- (devattr->max_fast_reg_page_list_len == 0)) {
+ if (!(ia->ri_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+ (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
dprintk("RPC: %s: FRMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
struct ib_cq_init_attr cq_attr = {};
unsigned int max_qp_wr;
int rc, err;
- if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
}
- if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+ if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
__func__);
return -ENOMEM;
}
- max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
/* check provider's send/recv wr limits */
if (cdata->max_requests > max_qp_wr)
@@ -616,10 +608,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
- if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
- ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
- else if (ep->rep_cqinit <= 2)
- ep->rep_cqinit = 0;
+ if (ep->rep_cqinit <= 2)
+ ep->rep_cqinit = 0; /* always signal? */
INIT_CQCOUNT(ep);
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
@@ -670,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources =
- devattr->max_qp_rd_atom;
+ ia->ri_device->attrs.max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
@@ -852,10 +842,11 @@ retry:
if (extras) {
rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
- if (rc)
+ if (rc) {
pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
__func__, rc);
rc = 0;
+ }
}
}
@@ -1337,15 +1328,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep;
- unsigned long flags;
int rc;
while (count--) {
- spin_lock_irqsave(&buffers->rb_lock, flags);
+ spin_lock(&buffers->rb_lock);
if (list_empty(&buffers->rb_recv_bufs))
goto out_reqbuf;
rep = rpcrdma_buffer_get_rep_locked(buffers);
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_unlock(&buffers->rb_lock);
rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc)
@@ -1355,7 +1345,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
return 0;
out_reqbuf:
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_unlock(&buffers->rb_lock);
pr_warn("%s: no extra receive buffers\n", __func__);
return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ac7f8d4f632a..38fe11b09875 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -55,6 +55,11 @@
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
/*
* Interface Adapter -- one per transport instance
*/
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
unsigned int ri_max_frmr_depth;
- struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -88,12 +92,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker;
};
-/*
- * Force a signaled SEND Work Request every so often,
- * in case the provider needs to do some housekeeping.
- */
-#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
-
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
@@ -148,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base;
}
+#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
@@ -207,6 +207,12 @@ struct rpcrdma_frmr {
enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt;
+ bool fr_waiter;
+ struct completion fr_linv_done;;
+ union {
+ struct ib_reg_wr fr_regwr;
+ struct ib_send_wr fr_invwr;
+ };
};
struct rpcrdma_fmr {
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
struct list_head rb_allreqs;
+
+ u32 rb_bc_max_requests;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -364,6 +372,8 @@ struct rpcrdma_xprt;
struct rpcrdma_memreg_ops {
int (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool);
+ void (*ro_unmap_sync)(struct rpcrdma_xprt *,
+ struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *);
int (*ro_open)(struct rpcrdma_ia *,
@@ -514,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
@@ -529,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/* Temporary NFS request map cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2ffaf6a79499..fde2138b81e7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock))
return -ENOTSOCK;
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
if (base != 0) {
addr = NULL;
addrlen = 0;
@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--;
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
/**
@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
- /*
- * Notify TCP that we're limited by the application
- * window size
- */
- set_bit(SOCK_NOSPACE, &transport->sock->flags);
- sk->sk_write_pending++;
- /* ...and wait for more buffer space */
- xprt_wait_for_buffer_space(task, xs_nospace_callback);
- }
- } else {
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ /* wait for more buffer space */
+ sk->sk_write_pending++;
+ xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ } else
ret = -ENOTCONN;
- }
spin_unlock_bh(&xprt->transport_lock);
@@ -616,9 +605,6 @@ process_status:
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE:
@@ -626,7 +612,10 @@ process_status:
case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ECONNRESET:
case -ECONNREFUSED:
case -ENOTCONN:
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
- struct socket *sock;
+ struct socket_wq *wq;
struct rpc_xprt *xprt;
- if (unlikely(!(sock = sk->sk_socket)))
+ if (!sk->sk_socket)
return;
- clear_bit(SOCK_NOSPACE, &sock->flags);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
- if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
- return;
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
+ goto out;
xprt_write_space(xprt);
+out:
+ rcu_read_unlock();
}
/**
@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
}
}
#else
-static inline void xs_reclassify_socketu(struct socket *sock)
-{
-}
-
-static inline void xs_reclassify_socket4(struct socket *sock)
-{
-}
-
-static inline void xs_reclassify_socket6(struct socket *sock)
-{
-}
-
static inline void xs_reclassify_socket(int family, struct socket *sock)
{
}
@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
"transport socket (%d).\n", -status);
goto out;
}
- xs_reclassify_socketu(sock);
+ xs_reclassify_socket(AF_LOCAL, sock);
dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index ebc661d3b6e3..47f7da58a7f0 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -20,6 +20,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
@@ -567,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-static DEFINE_MUTEX(switchdev_mutex);
static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
/**
@@ -582,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
* Call all network notifier blocks. This should be called by driver
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
+ * rtnl_lock must be held.
*/
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
int err;
+ ASSERT_RTNL();
+
info->dev = dev;
- mutex_lock(&switchdev_mutex);
err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
- mutex_unlock(&switchdev_mutex);
return err;
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0c2944fb9ae0..e31d92f80572 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -123,7 +123,6 @@ struct tipc_stats {
struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
- struct tipc_media_addr *media_addr;
struct net *net;
/* Management and link supervision data */
@@ -904,8 +903,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
return link_schedule_user(l, list);
}
- if (unlikely(msg_size(hdr) > mtu))
+ if (unlikely(msg_size(hdr) > mtu)) {
+ skb_queue_purge(list);
return -EMSGSIZE;
+ }
/* Prepare each packet for sending, and add to relevant queue: */
while (skb_queue_len(list)) {
@@ -917,8 +918,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (likely(skb_queue_len(transmq) < maxwin)) {
_skb = skb_clone(skb, GFP_ATOMIC);
- if (!_skb)
+ if (!_skb) {
+ skb_queue_purge(list);
return -ENOBUFS;
+ }
__skb_dequeue(list);
__skb_queue_tail(transmq, skb);
__skb_queue_tail(xmitq, _skb);
@@ -1261,26 +1264,6 @@ drop:
return rc;
}
-/*
- * Send protocol message to the other endpoint.
- */
-static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
- int probe_msg, u32 gap, u32 tolerance,
- u32 priority)
-{
- struct sk_buff *skb = NULL;
- struct sk_buff_head xmitq;
-
- __skb_queue_head_init(&xmitq);
- tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
- tolerance, priority, &xmitq);
- skb = __skb_dequeue(&xmitq);
- if (!skb)
- return;
- tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
- l->rcv_unacked = 0;
-}
-
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq)
@@ -1479,6 +1462,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
l->tolerance = peers_tol;
+ if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
+ TIPC_MAX_LINK_PRI)) {
+ l->priority = peers_prio;
+ rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+
l->silent_intv_cnt = 0;
l->stats.recv_states++;
if (msg_probe(hdr))
@@ -1973,8 +1962,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_LINK_GET);
- if (!hdr)
+ if (!hdr) {
+ tipc_bcast_unlock(net);
return -EMSGSIZE;
+ }
attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
if (!attrs)
@@ -2021,16 +2012,18 @@ msg_full:
return -EMSGSIZE;
}
-void tipc_link_set_tolerance(struct tipc_link *l, u32 tol)
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
}
-void tipc_link_set_prio(struct tipc_link *l, u32 prio)
+void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ struct sk_buff_head *xmitq)
{
l->priority = prio;
- tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
}
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b2ae0f4276af..b4ee9d6e181d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -112,8 +112,10 @@ char tipc_link_plane(struct tipc_link *l);
int tipc_link_prio(struct tipc_link *l);
int tipc_link_window(struct tipc_link *l);
unsigned long tipc_link_tolerance(struct tipc_link *l);
-void tipc_link_set_tolerance(struct tipc_link *l, u32 tol);
-void tipc_link_set_prio(struct tipc_link *l, u32 prio);
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ struct sk_buff_head *xmitq);
+void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ struct sk_buff_head *xmitq);
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 91fce70291a8..777b979b8463 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -418,6 +418,9 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
struct tipc_subscription *s)
{
struct sub_seq *sseq = nseq->sseqs;
+ struct tipc_name_seq ns;
+
+ tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
list_add(&s->nameseq_list, &nseq->subscriptions);
@@ -425,7 +428,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
return;
while (sseq != &nseq->sseqs[nseq->first_free]) {
- if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) {
+ if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
struct publication *crs;
struct name_info *info = sseq->info;
int must_report = 1;
@@ -722,9 +725,10 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
void tipc_nametbl_subscribe(struct tipc_subscription *s)
{
struct tipc_net *tn = net_generic(s->net, tipc_net_id);
- u32 type = s->seq.type;
+ u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
int index = hash(type);
struct name_seq *seq;
+ struct tipc_name_seq ns;
spin_lock_bh(&tn->nametbl_lock);
seq = nametbl_find_seq(s->net, type);
@@ -735,8 +739,9 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
tipc_nameseq_subscribe(seq, s);
spin_unlock_bh(&seq->lock);
} else {
+ tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
pr_warn("Failed to create subscription for {%u,%u,%u}\n",
- s->seq.type, s->seq.lower, s->seq.upper);
+ ns.type, ns.lower, ns.upper);
}
spin_unlock_bh(&tn->nametbl_lock);
}
@@ -748,9 +753,10 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
{
struct tipc_net *tn = net_generic(s->net, tipc_net_id);
struct name_seq *seq;
+ u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
spin_lock_bh(&tn->nametbl_lock);
- seq = nametbl_find_seq(s->net, s->seq.type);
+ seq = nametbl_find_seq(s->net, type);
if (seq != NULL) {
spin_lock_bh(&seq->lock);
list_del_init(&s->nameseq_list);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 2c016fdefe97..d7d050f44fc1 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1104,8 +1104,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
req_nlh = (struct nlmsghdr *)skb->data;
msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
msg.cmd = req_userhdr->cmd;
- msg.dst_sk = info->dst_sk;
msg.net = genl_info_net(info);
+ msg.dst_sk = skb->sk;
if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index fa97d9649a28..cdb79503d890 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -225,9 +225,10 @@ static unsigned int tipc_hashfn(u32 addr)
static void tipc_node_kref_release(struct kref *kref)
{
- struct tipc_node *node = container_of(kref, struct tipc_node, kref);
+ struct tipc_node *n = container_of(kref, struct tipc_node, kref);
- tipc_node_delete(node);
+ kfree(n->bc_entry.link);
+ kfree_rcu(n, rcu);
}
static void tipc_node_put(struct tipc_node *node)
@@ -245,23 +246,23 @@ static void tipc_node_get(struct tipc_node *node)
*/
static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_node *node;
+ unsigned int thash = tipc_hashfn(addr);
if (unlikely(!in_own_cluster_exact(net, addr)))
return NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
- hash) {
- if (node->addr == addr) {
- tipc_node_get(node);
- rcu_read_unlock();
- return node;
- }
+ hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
+ if (node->addr != addr)
+ continue;
+ if (!kref_get_unless_zero(&node->kref))
+ node = NULL;
+ break;
}
rcu_read_unlock();
- return NULL;
+ return node;
}
static void tipc_node_read_lock(struct tipc_node *n)
@@ -346,12 +347,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
skb_queue_head_init(&n->bc_entry.inputq2);
for (i = 0; i < MAX_BEARERS; i++)
spin_lock_init(&n->links[i].lock);
- hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
- list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
- if (n->addr < temp_node->addr)
- break;
- }
- list_add_tail_rcu(&n->list, &temp_node->list);
n->state = SELF_DOWN_PEER_LEAVING;
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
@@ -372,6 +367,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
tipc_node_get(n);
setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
n->keepalive_intv = U32_MAX;
+ hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
exit:
spin_unlock_bh(&tn->node_list_lock);
return n;
@@ -395,21 +396,20 @@ static void tipc_node_delete(struct tipc_node *node)
{
list_del_rcu(&node->list);
hlist_del_rcu(&node->hash);
- kfree(node->bc_entry.link);
- kfree_rcu(node, rcu);
+ tipc_node_put(node);
+
+ del_timer_sync(&node->timer);
+ tipc_node_put(node);
}
void tipc_node_stop(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_node *node, *t_node;
spin_lock_bh(&tn->node_list_lock);
- list_for_each_entry_safe(node, t_node, &tn->node_list, list) {
- if (del_timer(&node->timer))
- tipc_node_put(node);
- tipc_node_put(node);
- }
+ list_for_each_entry_safe(node, t_node, &tn->node_list, list)
+ tipc_node_delete(node);
spin_unlock_bh(&tn->node_list_lock);
}
@@ -530,9 +530,7 @@ static void tipc_node_timeout(unsigned long data)
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
}
- if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
- tipc_node_get(n);
- tipc_node_put(n);
+ mod_timer(&n->timer, jiffies + n->keepalive_intv);
}
/**
@@ -1166,7 +1164,7 @@ msg_full:
* @dnode: address of destination node
* @selector: a number used for deterministic link selection
* Consumes the buffer chain, except when returning -ELINKCONG
- * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
*/
int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
u32 dnode, int selector)
@@ -1174,33 +1172,43 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
- int bearer_id = -1;
- int rc = -EHOSTUNREACH;
+ int bearer_id;
+ int rc;
+
+ if (in_own_node(net, dnode)) {
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
- __skb_queue_head_init(&xmitq);
n = tipc_node_find(net, dnode);
- if (likely(n)) {
- tipc_node_read_lock(n);
- bearer_id = n->active_links[selector & 1];
- if (bearer_id >= 0) {
- le = &n->links[bearer_id];
- spin_lock_bh(&le->lock);
- rc = tipc_link_xmit(le->link, list, &xmitq);
- spin_unlock_bh(&le->lock);
- }
+ if (unlikely(!n)) {
+ skb_queue_purge(list);
+ return -EHOSTUNREACH;
+ }
+
+ tipc_node_read_lock(n);
+ bearer_id = n->active_links[selector & 1];
+ if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
- if (likely(!rc))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
- else if (rc == -ENOBUFS)
- tipc_node_link_down(n, bearer_id, false);
tipc_node_put(n);
- return rc;
+ skb_queue_purge(list);
+ return -EHOSTUNREACH;
}
- if (likely(in_own_node(net, dnode))) {
- tipc_sk_rcv(net, list);
- return 0;
- }
+ __skb_queue_head_init(&xmitq);
+ le = &n->links[bearer_id];
+ spin_lock_bh(&le->lock);
+ rc = tipc_link_xmit(le->link, list, &xmitq);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(n);
+
+ if (likely(rc == 0))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ else if (rc == -ENOBUFS)
+ tipc_node_link_down(n, bearer_id, false);
+
+ tipc_node_put(n);
+
return rc;
}
@@ -1637,9 +1645,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
char *name;
struct tipc_link *link;
struct tipc_node *node;
+ struct sk_buff_head xmitq;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);
+ __skb_queue_head_init(&xmitq);
+
if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;
@@ -1683,13 +1694,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
u32 tol;
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- tipc_link_set_tolerance(link, tol);
+ tipc_link_set_tolerance(link, tol, &xmitq);
}
if (props[TIPC_NLA_PROP_PRIO]) {
u32 prio;
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
- tipc_link_set_prio(link, prio);
+ tipc_link_set_prio(link, prio, &xmitq);
}
if (props[TIPC_NLA_PROP_WIN]) {
u32 win;
@@ -1701,7 +1712,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
out:
tipc_node_read_unlock(node);
-
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
return res;
}
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 922e04a43396..2446bfbaa309 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -571,13 +571,13 @@ static void tipc_work_stop(struct tipc_server *s)
static int tipc_work_start(struct tipc_server *s)
{
- s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1);
+ s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
if (!s->rcv_wq) {
pr_err("can't start tipc receive workqueue\n");
return -ENOMEM;
}
- s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1);
+ s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
if (!s->send_wq) {
pr_err("can't start tipc send workqueue\n");
destroy_workqueue(s->rcv_wq);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 350cca33ee0a..22963cafd5ed 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -92,25 +92,42 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
*
* Returns 1 if there is overlap, otherwise 0.
*/
-int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
+int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
u32 found_upper)
{
- if (found_lower < sub->seq.lower)
- found_lower = sub->seq.lower;
- if (found_upper > sub->seq.upper)
- found_upper = sub->seq.upper;
+ if (found_lower < seq->lower)
+ found_lower = seq->lower;
+ if (found_upper > seq->upper)
+ found_upper = seq->upper;
if (found_lower > found_upper)
return 0;
return 1;
}
+u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
+{
+ return htohl(type, swap);
+}
+
+void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
+ struct tipc_name_seq *out)
+{
+ out->type = htohl(in->type, swap);
+ out->lower = htohl(in->lower, swap);
+ out->upper = htohl(in->upper, swap);
+}
+
void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
u32 found_upper, u32 event, u32 port_ref,
u32 node, int must)
{
- if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper))
+ struct tipc_name_seq seq;
+
+ tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
+ if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
- if (!must && !(sub->filter & TIPC_SUB_PORTS))
+ if (!must &&
+ !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS))
return;
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -171,12 +188,14 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
{
struct tipc_subscription *sub, *temp;
+ u32 timeout;
spin_lock_bh(&subscriber->lock);
/* Destroy any existing subscriptions for subscriber */
list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
subscrp_list) {
- if (del_timer(&sub->timer)) {
+ timeout = htohl(sub->evt.s.timeout, sub->swap);
+ if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
tipc_subscrp_delete(sub);
tipc_subscrb_put(subscriber);
}
@@ -200,13 +219,16 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
struct tipc_subscription *sub, *temp;
+ u32 timeout;
spin_lock_bh(&subscriber->lock);
/* Find first matching subscription, exit if not found */
list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
subscrp_list) {
if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- if (del_timer(&sub->timer)) {
+ timeout = htohl(sub->evt.s.timeout, sub->swap);
+ if ((timeout == TIPC_WAIT_FOREVER) ||
+ del_timer(&sub->timer)) {
tipc_subscrp_delete(sub);
tipc_subscrb_put(subscriber);
}
@@ -216,66 +238,67 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
spin_unlock_bh(&subscriber->lock);
}
-static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber,
- struct tipc_subscription **sub_p)
+static struct tipc_subscription *tipc_subscrp_create(struct net *net,
+ struct tipc_subscr *s,
+ int swap)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_subscription *sub;
- int swap;
-
- /* Determine subscriber's endianness */
- swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
-
- /* Detect & process a subscription cancellation request */
- if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
- s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- tipc_subscrp_cancel(s, subscriber);
- return 0;
- }
+ u32 filter = htohl(s->filter, swap);
/* Refuse subscription if global limit exceeded */
if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
pr_warn("Subscription rejected, limit reached (%u)\n",
TIPC_MAX_SUBSCRIPTIONS);
- return -EINVAL;
+ return NULL;
}
/* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
pr_warn("Subscription rejected, no memory\n");
- return -ENOMEM;
+ return NULL;
}
/* Initialize subscription object */
sub->net = net;
- sub->seq.type = htohl(s->seq.type, swap);
- sub->seq.lower = htohl(s->seq.lower, swap);
- sub->seq.upper = htohl(s->seq.upper, swap);
- sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap));
- sub->filter = htohl(s->filter, swap);
- if ((!(sub->filter & TIPC_SUB_PORTS) ==
- !(sub->filter & TIPC_SUB_SERVICE)) ||
- (sub->seq.lower > sub->seq.upper)) {
+ if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
+ (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
pr_warn("Subscription rejected, illegal request\n");
kfree(sub);
- return -EINVAL;
+ return NULL;
}
- spin_lock_bh(&subscriber->lock);
- list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- spin_unlock_bh(&subscriber->lock);
- sub->subscriber = subscriber;
+
sub->swap = swap;
memcpy(&sub->evt.s, s, sizeof(*s));
atomic_inc(&tn->subscription_count);
+ return sub;
+}
+
+static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
+ struct tipc_subscriber *subscriber, int swap)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_subscription *sub = NULL;
+ u32 timeout;
+
+ sub = tipc_subscrp_create(net, s, swap);
+ if (!sub)
+ return tipc_conn_terminate(tn->topsrv, subscriber->conid);
+
+ spin_lock_bh(&subscriber->lock);
+ list_add(&sub->subscrp_list, &subscriber->subscrp_list);
+ tipc_subscrb_get(subscriber);
+ sub->subscriber = subscriber;
+ tipc_nametbl_subscribe(sub);
+ spin_unlock_bh(&subscriber->lock);
+
+ timeout = htohl(sub->evt.s.timeout, swap);
+ if (timeout == TIPC_WAIT_FOREVER)
+ return;
+
setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
- if (sub->timeout != TIPC_WAIT_FOREVER)
- sub->timeout += jiffies;
- if (!mod_timer(&sub->timer, sub->timeout))
- tipc_subscrb_get(subscriber);
- *sub_p = sub;
- return 0;
+ mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
}
/* Handle one termination request for the subscriber */
@@ -290,14 +313,20 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
void *buf, size_t len)
{
struct tipc_subscriber *subscriber = usr_data;
- struct tipc_subscription *sub = NULL;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_subscr *s = (struct tipc_subscr *)buf;
+ int swap;
+
+ /* Determine subscriber's endianness */
+ swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
+ TIPC_SUB_CANCEL));
+
+ /* Detect & process a subscription cancellation request */
+ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
+ s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+ return tipc_subscrp_cancel(s, subscriber);
+ }
- tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
- if (sub)
- tipc_nametbl_subscribe(sub);
- else
- tipc_conn_terminate(tn->topsrv, subscriber->conid);
+ tipc_subscrp_subscribe(net, s, subscriber, swap);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 92ee18cc5fe6..be60103082c9 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -50,21 +50,15 @@ struct tipc_subscriber;
* @subscriber: pointer to its subscriber
* @seq: name sequence associated with subscription
* @net: point to network namespace
- * @timeout: duration of subscription (in ms)
- * @filter: event filtering to be done for subscription
* @timer: timer governing subscription duration (optional)
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
* @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @server_ref: object reference of server port associated with subscription
* @swap: indicates if subscriber uses opposite endianness in its messages
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
struct tipc_subscriber *subscriber;
- struct tipc_name_seq seq;
struct net *net;
- unsigned long timeout;
- u32 filter;
struct timer_list timer;
struct list_head nameseq_list;
struct list_head subscrp_list;
@@ -72,11 +66,14 @@ struct tipc_subscription {
struct tipc_event evt;
};
-int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
+int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
u32 found_upper);
void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
u32 found_lower, u32 found_upper, u32 event,
u32 port_ref, u32 node, int must);
+void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
+ struct tipc_name_seq *out);
+u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c5bf5ef2bf89..8269da73e9e5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->fp[i]);
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@ -1534,7 +1534,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
unsigned char max_level = 0;
- int unix_sock_count = 0;
if (too_many_unix_fds(current))
return -ETOOMANYREFS;
@@ -1542,11 +1541,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
- if (sk) {
- unix_sock_count++;
+ if (sk)
max_level = max(max_level,
unix_sk(sk)->recursion_level);
- }
}
if (unlikely(max_level > MAX_RECURSION_LEVEL))
return -ETOOMANYREFS;
@@ -1561,7 +1558,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
return -ENOMEM;
for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
return max_level;
}
@@ -1781,7 +1778,12 @@ restart_locked:
goto out_unlock;
}
- if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ /* other == sk && unix_peer(other) != sk if
+ * - unix_peer(sk) == NULL, destination address bound to sk
+ * - unix_peer(sk) == sk by time of get but disconnected before lock
+ */
+ if (other != sk &&
+ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
@@ -2277,13 +2279,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
size_t size = state->size;
unsigned int last_len;
- err = -EINVAL;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ err = -EINVAL;
goto out;
+ }
- err = -EOPNOTSUPP;
- if (flags & MSG_OOB)
+ if (unlikely(flags & MSG_OOB)) {
+ err = -EOPNOTSUPP;
goto out;
+ }
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@ -2305,6 +2309,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
bool drop_skb;
struct sk_buff *skb, *last;
+redo:
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD)) {
err = -ECONNRESET;
@@ -2329,9 +2334,11 @@ again:
goto unlock;
unix_state_unlock(sk);
- err = -EAGAIN;
- if (!timeo)
+ if (!timeo) {
+ err = -EAGAIN;
break;
+ }
+
mutex_unlock(&u->readlock);
timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2339,11 +2346,12 @@ again:
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
+ scm_destroy(&scm);
goto out;
}
mutex_lock(&u->readlock);
- continue;
+ goto redo;
unlock:
unix_state_unlock(sk);
break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64d5287..4d9679701a6d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
{
int i;
struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc2283af5..6a0d48525fcf 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp)
* descriptor if it is for an AF_UNIX socket.
*/
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
}
unix_tot_inflight++;
}
- fp->f_cred->user->unix_inflight++;
+ user->unix_inflight++;
spin_unlock(&unix_gc_lock);
}
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
list_del_init(&u->link);
unix_tot_inflight--;
}
- fp->f_cred->user->unix_inflight--;
+ user->unix_inflight--;
spin_unlock(&unix_gc_lock);
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220fbfa0..bbe65dcb9738 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1557,8 +1557,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
while (total_written < len) {
ssize_t written;
@@ -1578,7 +1576,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
}
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
out_wait:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,7 +1713,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
*/
err = -ENOMEM;
- goto out_wait;
+ goto out;
} else if (ready > 0) {
ssize_t read;
@@ -1750,7 +1746,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
@@ -1773,7 +1769,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
break;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = -EAGAIN;
break;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1816,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = copied;
}
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bc76b281ed3a..c5fb317eee68 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -222,20 +222,22 @@ static const struct ieee80211_regdomain world_regdom = {
/* IEEE 802.11b/g, channels 1..11 */
REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
/* IEEE 802.11b/g, channels 12..13. */
- REG_RULE(2467-10, 2472+10, 40, 6, 20,
- NL80211_RRF_NO_IR),
+ REG_RULE(2467-10, 2472+10, 20, 6, 20,
+ NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
/* IEEE 802.11 channel 14 - Only JP enables
* this and for 802.11b only */
REG_RULE(2484-10, 2484+10, 20, 6, 20,
NL80211_RRF_NO_IR |
NL80211_RRF_NO_OFDM),
/* IEEE 802.11a, channel 36..48 */
- REG_RULE(5180-10, 5240+10, 160, 6, 20,
- NL80211_RRF_NO_IR),
+ REG_RULE(5180-10, 5240+10, 80, 6, 20,
+ NL80211_RRF_NO_IR |
+ NL80211_RRF_AUTO_BW),
/* IEEE 802.11a, channel 52..64 - DFS required */
- REG_RULE(5260-10, 5320+10, 160, 6, 20,
+ REG_RULE(5260-10, 5320+10, 80, 6, 20,
NL80211_RRF_NO_IR |
+ NL80211_RRF_AUTO_BW |
NL80211_RRF_DFS),
/* IEEE 802.11a, channel 100..144 - DFS required */
@@ -2692,7 +2694,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
const struct ieee80211_power_rule *power_rule = NULL;
char bw[32], cac_time[32];
- pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+ pr_debug(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
for (i = 0; i < rd->n_reg_rules; i++) {
reg_rule = &rd->reg_rules[i];
@@ -2719,7 +2721,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
* in certain regions
*/
if (power_rule->max_antenna_gain)
- pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+ pr_debug(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
bw,
@@ -2727,7 +2729,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule->max_eirp,
cac_time);
else
- pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+ pr_debug(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
bw,
@@ -2759,35 +2761,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
struct cfg80211_registered_device *rdev;
rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
if (rdev) {
- pr_info("Current regulatory domain updated by AP to: %c%c\n",
+ pr_debug("Current regulatory domain updated by AP to: %c%c\n",
rdev->country_ie_alpha2[0],
rdev->country_ie_alpha2[1]);
} else
- pr_info("Current regulatory domain intersected:\n");
+ pr_debug("Current regulatory domain intersected:\n");
} else
- pr_info("Current regulatory domain intersected:\n");
+ pr_debug("Current regulatory domain intersected:\n");
} else if (is_world_regdom(rd->alpha2)) {
- pr_info("World regulatory domain updated:\n");
+ pr_debug("World regulatory domain updated:\n");
} else {
if (is_unknown_alpha2(rd->alpha2))
- pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+ pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
else {
if (reg_request_cell_base(lr))
- pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+ pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
rd->alpha2[0], rd->alpha2[1]);
else
- pr_info("Regulatory domain changed to country: %c%c\n",
+ pr_debug("Regulatory domain changed to country: %c%c\n",
rd->alpha2[0], rd->alpha2[1]);
}
}
- pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+ pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
print_rd_rules(rd);
}
static void print_regdomain_info(const struct ieee80211_regdomain *rd)
{
- pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+ pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
print_rd_rules(rd);
}
@@ -2808,7 +2810,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
return -EALREADY;
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
@@ -2844,7 +2847,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
return -EALREADY;
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
@@ -2902,7 +2906,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
*/
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc3676eb6239..ff4a91fcab9f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -167,6 +167,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
{
struct sk_buff *segs;
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(segs))