summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/Kconfig32
-rw-r--r--net/8021q/Kconfig2
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/9p/mod.c1
-rw-r--r--net/9p/trans_fd.c39
-rw-r--r--net/9p/trans_xen.c61
-rw-r--r--net/Kconfig26
-rw-r--r--net/atm/Kconfig2
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/ax25/af_ax25.c10
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/bluetooth/hci_event.c26
-rw-r--r--net/bpf/test_run.c19
-rw-r--r--net/bpfilter/bpfilter_kern.c42
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S2
-rw-r--r--net/bridge/Kconfig6
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_mrp.c12
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_private_mrp.h2
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c1
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c1
-rw-r--r--net/caif/Kconfig8
-rw-r--r--net/can/Kconfig8
-rw-r--r--net/ceph/ceph_common.c14
-rw-r--r--net/ceph/osd_client.c9
-rw-r--r--net/compat.c57
-rw-r--r--net/core/dev.c41
-rw-r--r--net/core/dev_addr_lists.c22
-rw-r--r--net/core/devlink.c25
-rw-r--r--net/core/drop_monitor.c1
-rw-r--r--net/core/filter.c29
-rw-r--r--net/core/flow_dissector.c32
-rw-r--r--net/core/flow_offload.c48
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/scm.c50
-rw-r--r--net/core/skmsg.c23
-rw-r--r--net/core/sock.c31
-rw-r--r--net/core/sock_map.c91
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/xdp.c1
-rw-r--r--net/dcb/Kconfig2
-rw-r--r--net/dccp/Kconfig4
-rw-r--r--net/dccp/ccids/Kconfig6
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/Kconfig4
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/tag_edsa.c37
-rw-r--r--net/ethtool/cabletest.c17
-rw-r--r--net/ethtool/common.c2
-rw-r--r--net/ethtool/ioctl.c2
-rw-r--r--net/ethtool/linkstate.c11
-rw-r--r--net/ethtool/netlink.c27
-rw-r--r--net/hsr/Kconfig2
-rw-r--r--net/hsr/hsr_device.c32
-rw-r--r--net/hsr/hsr_device.h2
-rw-r--r--net/hsr/hsr_forward.c18
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/hsr/hsr_main.c9
-rw-r--r--net/hsr/hsr_netlink.c17
-rw-r--r--net/ieee802154/6lowpan/Kconfig2
-rw-r--r--net/ieee802154/Kconfig6
-rw-r--r--net/ipv4/Kconfig106
-rw-r--r--net/ipv4/bpfilter/sockopt.c20
-rw-r--r--net/ipv4/esp4_offload.c1
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c14
-rw-r--r--net/ipv4/ip_tunnel_core.c18
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/netfilter/ip_tables.c15
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c1
-rw-r--r--net/ipv4/netfilter/iptable_filter.c10
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c10
-rw-r--r--net/ipv4/netfilter/iptable_nat.c10
-rw-r--r--net/ipv4/netfilter/iptable_raw.c10
-rw-r--r--net/ipv4/netfilter/iptable_security.c11
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/nexthop.c82
-rw-r--r--net/ipv4/ping.c3
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c87
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_input.c39
-rw-r--r--net/ipv4/tcp_ipv4.c20
-rw-r--r--net/ipv4/tcp_output.c21
-rw-r--r--net/ipv4/udp.c17
-rw-r--r--net/ipv6/Kconfig78
-rw-r--r--net/ipv6/anycast.c17
-rw-r--r--net/ipv6/esp6.c13
-rw-r--r--net/ipv6/esp6_offload.c1
-rw-r--r--net/ipv6/fou6.c1
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ip6_vti.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c1
-rw-r--r--net/ipv6/netfilter/Kconfig6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c15
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c1
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c10
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c10
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c10
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c10
-rw-r--r--net/ipv6/netfilter/ip6table_security.c10
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/udp.c17
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/kcm/Kconfig2
-rw-r--r--net/key/af_key.c11
-rw-r--r--net/l2tp/Kconfig2
-rw-r--r--net/l2tp/l2tp_core.c5
-rw-r--r--net/l3mdev/Kconfig2
-rw-r--r--net/lapb/Kconfig2
-rw-r--r--net/llc/af_llc.c10
-rw-r--r--net/mac80211/Kconfig52
-rw-r--r--net/mac80211/cfg.c1
-rw-r--r--net/mac80211/mesh.c13
-rw-r--r--net/mac80211/mesh_hwmp.c7
-rw-r--r--net/mac80211/mesh_pathtbl.c1
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c28
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/status.c22
-rw-r--r--net/mac80211/tx.c15
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mac802154/Kconfig2
-rw-r--r--net/mpls/Kconfig6
-rw-r--r--net/mptcp/crypto.c15
-rw-r--r--net/mptcp/options.c10
-rw-r--r--net/mptcp/protocol.c47
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c62
-rw-r--r--net/ncsi/Kconfig4
-rw-r--r--net/netfilter/Kconfig58
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipvs/Kconfig54
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c32
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nf_flow_table_core.c46
-rw-r--r--net/netfilter/nf_flow_table_inet.c1
-rw-r--r--net/netfilter/nf_flow_table_offload.c1
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c184
-rw-r--r--net/netfilter/nf_tables_offload.c1
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/nft_compat.c1
-rw-r--r--net/netfilter/nft_connlimit.c1
-rw-r--r--net/netfilter/nft_counter.c1
-rw-r--r--net/netfilter/nft_ct.c1
-rw-r--r--net/netfilter/nft_dup_netdev.c1
-rw-r--r--net/netfilter/nft_fib_inet.c1
-rw-r--r--net/netfilter/nft_fib_netdev.c1
-rw-r--r--net/netfilter/nft_flow_offload.c1
-rw-r--r--net/netfilter/nft_hash.c1
-rw-r--r--net/netfilter/nft_limit.c1
-rw-r--r--net/netfilter/nft_log.c1
-rw-r--r--net/netfilter/nft_masq.c1
-rw-r--r--net/netfilter/nft_nat.c1
-rw-r--r--net/netfilter/nft_numgen.c1
-rw-r--r--net/netfilter/nft_objref.c1
-rw-r--r--net/netfilter/nft_osf.c1
-rw-r--r--net/netfilter/nft_queue.c1
-rw-r--r--net/netfilter/nft_quota.c1
-rw-r--r--net/netfilter/nft_redir.c1
-rw-r--r--net/netfilter/nft_reject.c1
-rw-r--r--net/netfilter/nft_reject_inet.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netfilter/nft_set_rbtree.c21
-rw-r--r--net/netfilter/nft_synproxy.c1
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/x_tables.c5
-rw-r--r--net/netfilter/xt_nat.c1
-rw-r--r--net/netlabel/Kconfig2
-rw-r--r--net/netlink/Kconfig2
-rw-r--r--net/netlink/genetlink.c107
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/hci/Kconfig2
-rw-r--r--net/nfc/nci/core.c5
-rw-r--r--net/nsh/Kconfig2
-rw-r--r--net/openvswitch/Kconfig8
-rw-r--r--net/openvswitch/actions.c9
-rw-r--r--net/packet/Kconfig4
-rw-r--r--net/qrtr/Kconfig6
-rw-r--r--net/qrtr/qrtr.c11
-rw-r--r--net/rds/Kconfig6
-rw-r--r--net/rds/connection.c11
-rw-r--r--net/rds/ib.h8
-rw-r--r--net/rds/rds.h7
-rw-r--r--net/rds/recv.c3
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rds/transport.c26
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/ar-internal.h119
-rw-r--r--net/rxrpc/call_accept.c7
-rw-r--r--net/rxrpc/call_event.c30
-rw-r--r--net/rxrpc/call_object.c27
-rw-r--r--net/rxrpc/conn_event.c7
-rw-r--r--net/rxrpc/conn_object.c8
-rw-r--r--net/rxrpc/input.c14
-rw-r--r--net/rxrpc/peer_event.c4
-rw-r--r--net/rxrpc/recvmsg.c83
-rw-r--r--net/rxrpc/sendmsg.c9
-rw-r--r--net/sched/Kconfig122
-rw-r--r--net/sched/act_connmark.c9
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_ct.c42
-rw-r--r--net/sched/act_ctinfo.c9
-rw-r--r--net/sched/act_gate.c126
-rw-r--r--net/sched/act_mpls.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/cls_api.c28
-rw-r--r--net/sched/cls_flow.c10
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/em_ipt.c2
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_atm.c8
-rw-r--r--net/sched/sch_cake.c64
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_hhf.c1
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/Kconfig2
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/bind_addr.c1
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/stream.c27
-rw-r--r--net/smc/Kconfig4
-rw-r--r--net/smc/af_smc.c12
-rw-r--r--net/smc/smc_cdc.c6
-rw-r--r--net/smc/smc_clc.c45
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c136
-rw-r--r--net/smc/smc_core.h9
-rw-r--r--net/smc/smc_ib.c27
-rw-r--r--net/smc/smc_ib.h4
-rw-r--r--net/smc/smc_ism.c11
-rw-r--r--net/smc/smc_ism.h3
-rw-r--r--net/smc/smc_llc.c212
-rw-r--r--net/smc/smc_llc.h2
-rw-r--r--net/smc/smc_pnet.c37
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/svcsock.c5
-rw-r--r--net/sunrpc/xdr.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c8
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c13
-rw-r--r--net/sunrpc/xprtrdma/transport.c7
-rw-r--r--net/sunrpc/xprtrdma/verbs.c106
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
-rw-r--r--net/sunrpc/xprtsock.c10
-rw-r--r--net/switchdev/Kconfig2
-rw-r--r--net/tipc/Kconfig4
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/link.c28
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tls/Kconfig2
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/Kconfig4
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/wireless/Kconfig14
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/wireless/nl80211.c11
-rw-r--r--net/x25/Kconfig2
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--net/xdp/xsk_buff_pool.c54
-rw-r--r--net/xfrm/Kconfig38
-rw-r--r--net/xfrm/espintcp.c62
-rw-r--r--net/xfrm/xfrm_device.c4
-rw-r--r--net/xfrm/xfrm_interface.c2
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c43
-rw-r--r--net/xfrm/xfrm_user.c18
320 files changed, 2800 insertions, 1900 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
index 4c1f4c0aa58a..d8fc459492b0 100644
--- a/net/6lowpan/Kconfig
+++ b/net/6lowpan/Kconfig
@@ -2,7 +2,7 @@
menuconfig 6LOWPAN
tristate "6LoWPAN Support"
depends on IPV6
- ---help---
+ help
This enables IPv6 over Low power Wireless Personal Area Network -
"6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks.
@@ -10,7 +10,7 @@ config 6LOWPAN_DEBUGFS
bool "6LoWPAN debugfs support"
depends on 6LOWPAN
depends on DEBUG_FS
- ---help---
+ help
This enables 6LoWPAN debugfs support. For example to manipulate
IPHC context information at runtime.
@@ -18,7 +18,7 @@ menuconfig 6LOWPAN_NHC
tristate "Next Header and Generic Header Compression Support"
depends on 6LOWPAN
default y
- ---help---
+ help
Support for next header and generic header compression defined in
RFC6282 and RFC7400.
@@ -27,78 +27,78 @@ if 6LOWPAN_NHC
config 6LOWPAN_NHC_DEST
tristate "Destination Options Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Destination Options Header compression according to
RFC6282.
config 6LOWPAN_NHC_FRAGMENT
tristate "Fragment Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Fragment Header compression according to RFC6282.
config 6LOWPAN_NHC_HOP
tristate "Hop-by-Hop Options Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Hop-by-Hop Options Header compression according to
RFC6282.
config 6LOWPAN_NHC_IPV6
tristate "IPv6 Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Header compression according to RFC6282.
config 6LOWPAN_NHC_MOBILITY
tristate "Mobility Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Mobility Header compression according to RFC6282.
config 6LOWPAN_NHC_ROUTING
tristate "Routing Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Routing Header compression according to RFC6282.
config 6LOWPAN_NHC_UDP
tristate "UDP Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 UDP Header compression according to RFC6282.
config 6LOWPAN_GHC_EXT_HDR_HOP
tristate "GHC Hop-by-Hop Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 Hop-by-Hop option generic header compression according
to RFC7400.
config 6LOWPAN_GHC_UDP
tristate "GHC UDP Support"
- ---help---
+ help
6LoWPAN IPv6 UDP generic header compression according to RFC7400.
config 6LOWPAN_GHC_ICMPV6
tristate "GHC ICMPv6 Support"
- ---help---
+ help
6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_DEST
tristate "GHC Destination Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 destination option generic header compression according
to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_FRAG
tristate "GHC Fragmentation Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 fragmentation option generic header compression
according to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_ROUTE
tristate "GHC Routing Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 routing option generic header compression according
to RFC7400.
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 5510b4b90ff0..8bf7a1765b78 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -5,7 +5,7 @@
config VLAN_8021Q
tristate "802.1Q/802.1ad VLAN Support"
- ---help---
+ help
Select this and you will be able to create 802.1Q VLAN interfaces
on your Ethernet interfaces. 802.1Q VLAN supports almost
everything a regular Ethernet interface does, including
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f00bb57f0f60..3dd7c972677b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -494,6 +494,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
* separate class since they always nest.
*/
static struct lock_class_key vlan_netdev_xmit_lock_key;
+static struct lock_class_key vlan_netdev_addr_lock_key;
static void vlan_dev_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -504,6 +505,8 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev,
static void vlan_dev_set_lockdep_class(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock,
+ &vlan_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
}
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c1b62428da7b..5126566850bd 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -189,3 +189,4 @@ MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)");
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 13cd683a658a..12ecacf0c55f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -362,6 +362,10 @@ static void p9_read_work(struct work_struct *work)
if (m->rreq->status == REQ_STATUS_SENT) {
list_del(&m->rreq->req_list);
p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+ } else if (m->rreq->status == REQ_STATUS_FLSHD) {
+ /* Ignore replies associated with a cancelled request. */
+ p9_debug(P9_DEBUG_TRANS,
+ "Ignore replies associated with a cancelled request\n");
} else {
spin_unlock(&m->client->lock);
p9_debug(P9_DEBUG_ERROR,
@@ -703,11 +707,20 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
{
p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+ spin_lock(&client->lock);
+ /* Ignore cancelled request if message has been received
+ * before lock.
+ */
+ if (req->status == REQ_STATUS_RCVD) {
+ spin_unlock(&client->lock);
+ return 0;
+ }
+
/* we haven't received a response for oldreq,
* remove it from the list.
*/
- spin_lock(&client->lock);
list_del(&req->req_list);
+ req->status = REQ_STATUS_FLSHD;
spin_unlock(&client->lock);
p9_req_put(req);
@@ -803,20 +816,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
return -ENOMEM;
ts->rd = fget(rfd);
+ if (!ts->rd)
+ goto out_free_ts;
+ if (!(ts->rd->f_mode & FMODE_READ))
+ goto out_put_rd;
ts->wr = fget(wfd);
- if (!ts->rd || !ts->wr) {
- if (ts->rd)
- fput(ts->rd);
- if (ts->wr)
- fput(ts->wr);
- kfree(ts);
- return -EIO;
- }
+ if (!ts->wr)
+ goto out_put_rd;
+ if (!(ts->wr->f_mode & FMODE_WRITE))
+ goto out_put_wr;
client->trans = ts;
client->status = Connected;
return 0;
+
+out_put_wr:
+ fput(ts->wr);
+out_put_rd:
+ fput(ts->rd);
+out_free_ts:
+ kfree(ts);
+ return -EIO;
}
static int p9_socket_open(struct p9_client *client, struct socket *csocket)
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 3963eb11c3fb..3debad93be1a 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -43,8 +43,8 @@
#include <net/9p/transport.h>
#define XEN_9PFS_NUM_RINGS 2
-#define XEN_9PFS_RING_ORDER 6
-#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER)
+#define XEN_9PFS_RING_ORDER 9
+#define XEN_9PFS_RING_SIZE(ring) XEN_FLEX_RING_SIZE(ring->intf->ring_order)
struct xen_9pfs_header {
uint32_t size;
@@ -132,8 +132,8 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
prod = ring->intf->out_prod;
virt_mb();
- return XEN_9PFS_RING_SIZE -
- xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size;
+ return XEN_9PFS_RING_SIZE(ring) -
+ xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) >= size;
}
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
@@ -167,17 +167,18 @@ again:
prod = ring->intf->out_prod;
virt_mb();
- if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons,
- XEN_9PFS_RING_SIZE) < size) {
+ if (XEN_9PFS_RING_SIZE(ring) -
+ xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) < size) {
spin_unlock_irqrestore(&ring->lock, flags);
goto again;
}
- masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring));
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
- &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
+ &masked_prod, masked_cons,
+ XEN_9PFS_RING_SIZE(ring));
p9_req->status = REQ_STATUS_SENT;
virt_wmb(); /* write ring before updating pointer */
@@ -207,19 +208,19 @@ static void p9_xen_response(struct work_struct *work)
prod = ring->intf->in_prod;
virt_rmb();
- if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) <
+ if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) <
sizeof(h)) {
notify_remote_via_irq(ring->irq);
return;
}
- masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring));
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
/* First, read just the header */
xen_9pfs_read_packet(&h, ring->data.in, sizeof(h),
masked_prod, &masked_cons,
- XEN_9PFS_RING_SIZE);
+ XEN_9PFS_RING_SIZE(ring));
req = p9_tag_lookup(priv->client, h.tag);
if (!req || req->status != REQ_STATUS_SENT) {
@@ -233,11 +234,11 @@ static void p9_xen_response(struct work_struct *work)
memcpy(&req->rc, &h, sizeof(h));
req->rc.offset = 0;
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
/* Then, read the whole packet (including the header) */
xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
- XEN_9PFS_RING_SIZE);
+ XEN_9PFS_RING_SIZE(ring));
virt_mb();
cons += h.size;
@@ -267,7 +268,7 @@ static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r)
static struct p9_trans_module p9_xen_trans = {
.name = "xen",
- .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
+ .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2),
.def = 1,
.create = p9_xen_create,
.close = p9_xen_close,
@@ -295,14 +296,16 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
if (priv->rings[i].irq > 0)
unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
if (priv->rings[i].data.in) {
- for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) {
+ for (j = 0;
+ j < (1 << priv->rings[i].intf->ring_order);
+ j++) {
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
free_pages((unsigned long)priv->rings[i].data.in,
- XEN_9PFS_RING_ORDER -
+ priv->rings[i].intf->ring_order -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
@@ -323,7 +326,8 @@ static int xen_9pfs_front_remove(struct xenbus_device *dev)
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
- struct xen_9pfs_dataring *ring)
+ struct xen_9pfs_dataring *ring,
+ unsigned int order)
{
int i = 0;
int ret = -ENOMEM;
@@ -342,21 +346,21 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
goto out;
ring->ref = ret;
bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
if (!bytes) {
ret = -ENOMEM;
goto out;
}
- for (; i < (1 << XEN_9PFS_RING_ORDER); i++) {
+ for (; i < (1 << order); i++) {
ret = gnttab_grant_foreign_access(
dev->otherend_id, virt_to_gfn(bytes) + i, 0);
if (ret < 0)
goto out;
ring->intf->ref[i] = ret;
}
- ring->intf->ring_order = XEN_9PFS_RING_ORDER;
+ ring->intf->ring_order = order;
ring->data.in = bytes;
- ring->data.out = bytes + XEN_9PFS_RING_SIZE;
+ ring->data.out = bytes + XEN_FLEX_RING_SIZE(order);
ret = xenbus_alloc_evtchn(dev, &ring->evtchn);
if (ret)
@@ -374,7 +378,7 @@ out:
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
free_pages((unsigned long)bytes,
- XEN_9PFS_RING_ORDER -
+ ring->intf->ring_order -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
@@ -404,8 +408,10 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
return -EINVAL;
max_ring_order = xenbus_read_unsigned(dev->otherend,
"max-ring-page-order", 0);
- if (max_ring_order < XEN_9PFS_RING_ORDER)
- return -EINVAL;
+ if (max_ring_order > XEN_9PFS_RING_ORDER)
+ max_ring_order = XEN_9PFS_RING_ORDER;
+ if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order))
+ p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -422,7 +428,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
for (i = 0; i < priv->num_rings; i++) {
priv->rings[i].priv = priv;
- ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]);
+ ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i],
+ max_ring_order);
if (ret < 0)
goto error;
}
diff --git a/net/Kconfig b/net/Kconfig
index 5c524c6ee75d..d1672280d6a4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -8,7 +8,7 @@ menuconfig NET
select NLATTR
select GENERIC_NET_UTILS
select BPF
- ---help---
+ help
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
when running on a stand-alone machine that isn't connected to any
@@ -70,7 +70,7 @@ source "net/xdp/Kconfig"
config INET
bool "TCP/IP networking"
- ---help---
+ help
These are the protocols used on the Internet and on most local
Ethernets. It is highly recommended to say Y here (this will enlarge
your kernel by about 400 KB), since some programs (e.g. the X window
@@ -121,7 +121,7 @@ config NETWORK_PHY_TIMESTAMPING
menuconfig NETFILTER
bool "Network packet filtering framework (Netfilter)"
- ---help---
+ help
Netfilter is a framework for filtering and mangling network packets
that pass through your Linux box.
@@ -192,7 +192,7 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- ---help---
+ help
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
want this option enabled.
@@ -268,7 +268,7 @@ config CGROUP_NET_PRIO
bool "Network priority cgroup"
depends on CGROUPS
select SOCK_CGROUP_DATA
- ---help---
+ help
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis.
@@ -276,7 +276,7 @@ config CGROUP_NET_CLASSID
bool "Network classid cgroup"
depends on CGROUPS
select SOCK_CGROUP_DATA
- ---help---
+ help
Cgroup subsystem for use as general purpose socket classid marker that is
being used in cls_cgroup and for netfilter matching.
@@ -294,7 +294,7 @@ config BPF_JIT
bool "enable BPF Just In Time compiler"
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
depends on MODULES
- ---help---
+ help
Berkeley Packet Filter filtering capabilities are normally handled
by an interpreter. This option allows kernel to generate a native
code when filter is loaded in memory. This should speedup
@@ -312,7 +312,7 @@ config BPF_STREAM_PARSER
depends on CGROUP_BPF
select STREAM_PARSER
select NET_SOCK_MSG
- ---help---
+ help
Enabling this allows a stream parser to be used with
BPF_MAP_TYPE_SOCKMAP.
@@ -324,7 +324,7 @@ config NET_FLOW_LIMIT
bool
depends on RPS
default y
- ---help---
+ help
The network stack has to drop packets when a receive processing CPU's
backlog reaches netdev_max_backlog. If a few out of many active flows
generate the vast majority of load, drop their traffic earlier to
@@ -337,7 +337,7 @@ menu "Network testing"
config NET_PKTGEN
tristate "Packet Generator (USE WITH CAUTION)"
depends on INET && PROC_FS
- ---help---
+ help
This module will inject preconfigured packets, at a configurable
rate, out of a given interface. It is used for network interface
stress testing and performance analysis. If you don't understand
@@ -352,7 +352,7 @@ config NET_PKTGEN
config NET_DROP_MONITOR
tristate "Network packet drop alerting service"
depends on INET && TRACEPOINTS
- ---help---
+ help
This feature provides an alerting service to userspace in the
event that packets are discarded in the network stack. Alerts
are broadcast via netlink socket to any listening user space
@@ -398,7 +398,7 @@ source "net/ife/Kconfig"
config LWTUNNEL
bool "Network light weight tunnels"
- ---help---
+ help
This feature provides an infrastructure to support light weight
tunnels like mpls. There is no netdevice associated with a light
weight tunnel endpoint. Tunnel encapsulation parameters are stored
@@ -408,7 +408,7 @@ config LWTUNNEL_BPF
bool "Execute BPF program as route nexthop action"
depends on LWTUNNEL && INET
default y if LWTUNNEL=y
- ---help---
+ help
Allows to run BPF programs as a nexthop action following a route
lookup for incoming and outgoing packets.
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index e61dcc9f85b2..77343d57ff2a 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -5,7 +5,7 @@
config ATM
tristate "Asynchronous Transfer Mode (ATM)"
- ---help---
+ help
ATM is a high-speed networking technology for Local Area Networks
and Wide Area Networks. It uses a fixed packet size and is
connection oriented, allowing for the negotiation of minimum
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ca37f5a71f5e..875fc0bc1780 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1536,10 +1536,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
struct lec_arp_table *to_return;
to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
- if (!to_return) {
- pr_info("LEC: Arp entry kmalloc failed\n");
+ if (!to_return)
return NULL;
- }
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 45d8e1d5d033..579b66da1d95 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -393,7 +393,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
* Each PPPoATM instance has its own tasklet - this is just a
* prototypical one used to initialize them
*/
- static const DECLARE_TASKLET(tasklet_proto, pppoatm_wakeup_sender, 0);
+ static const DECLARE_TASKLET_OLD(tasklet_proto, pppoatm_wakeup_sender);
if (copy_from_user(&be, arg, sizeof be))
return -EFAULT;
if (be.encaps != PPPOATM_ENCAPS_AUTODETECT &&
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fd91cd34f25e..dec3f35467c9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1187,7 +1187,10 @@ static int __must_check ax25_connect(struct socket *sock,
if (addr_len > sizeof(struct sockaddr_ax25) &&
fsa->fsa_ax25.sax25_ndigis != 0) {
/* Valid number of digipeaters ? */
- if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
+ if (fsa->fsa_ax25.sax25_ndigis < 1 ||
+ fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS ||
+ addr_len < sizeof(struct sockaddr_ax25) +
+ sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) {
err = -EINVAL;
goto out_release;
}
@@ -1507,7 +1510,10 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
/* Valid number of digipeaters ? */
- if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) {
+ if (usax->sax25_ndigis < 1 ||
+ usax->sax25_ndigis > AX25_MAX_DIGIS ||
+ addr_len < sizeof(struct sockaddr_ax25) +
+ sizeof(ax25_address) * usax->sax25_ndigis) {
err = -EINVAL;
goto out;
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0ddd80130ea3..f1f1c86f3419 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -745,6 +745,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
* separate class since they always nest.
*/
static struct lock_class_key batadv_netdev_xmit_lock_key;
+static struct lock_class_key batadv_netdev_addr_lock_key;
/**
* batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
@@ -765,6 +766,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev,
*/
static void batadv_set_lockdep_class(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cfeaee347db3..af9d7f2ff8ba 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1338,6 +1338,9 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
+ if (len > HCI_MAX_AD_LENGTH)
+ return;
+
bacpy(&d->last_adv_addr, bdaddr);
d->last_adv_addr_type = bdaddr_type;
d->last_adv_rssi = rssi;
@@ -5355,7 +5358,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *direct_addr,
- u8 direct_addr_type, s8 rssi, u8 *data, u8 len)
+ u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
+ bool ext_adv)
{
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
@@ -5377,6 +5381,11 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
+ if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
+ bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+ return;
+ }
+
/* Find the end of the data in case the report contains padded zero
* bytes at the end causing an invalid length value.
*
@@ -5437,7 +5446,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
direct_addr);
- if (conn && type == LE_ADV_IND) {
+ if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -5491,7 +5500,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* event or send an immediate device found event if the data
* should not be stored for later.
*/
- if (!has_pending_adv_report(hdev)) {
+ if (!ext_adv && !has_pending_adv_report(hdev)) {
/* If the report will trigger a SCAN_REQ store it for
* later merging.
*/
@@ -5526,7 +5535,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
/* If the new report will trigger a SCAN_REQ store it for
* later merging.
*/
- if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+ if (!ext_adv && (type == LE_ADV_IND ||
+ type == LE_ADV_SCAN_IND)) {
store_pending_adv_report(hdev, bdaddr, bdaddr_type,
rssi, flags, data, len);
return;
@@ -5566,7 +5576,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
rssi = ev->data[ev->length];
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, rssi,
- ev->data, ev->length);
+ ev->data, ev->length, false);
} else {
bt_dev_err(hdev, "Dropping invalid advertising data");
}
@@ -5638,7 +5648,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, ev->rssi,
- ev->data, ev->length);
+ ev->data, ev->length,
+ !(evt_type & LE_EXT_ADV_LEGACY_PDU));
}
ptr += sizeof(*ev) + ev->length;
@@ -5836,7 +5847,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, &ev->direct_addr,
- ev->direct_addr_type, ev->rssi, NULL, 0);
+ ev->direct_addr_type, ev->rssi, NULL, 0,
+ false);
ptr += sizeof(*ev);
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index bfd4ccd80847..b03c469cd01f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -147,6 +147,20 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
return a + (long)b + c + d + (long)e + f;
}
+struct bpf_fentry_test_t {
+ struct bpf_fentry_test_t *a;
+};
+
+int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
+{
+ return (long)arg;
+}
+
+int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
+{
+ return (long)arg->a;
+}
+
int noinline bpf_modify_return_test(int a, int *b)
{
*b += 1;
@@ -185,6 +199,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ struct bpf_fentry_test_t arg = {};
u16 side_effect = 0, ret = 0;
int b = 2, err = -EFAULT;
u32 retval = 0;
@@ -197,7 +212,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test3(4, 5, 6) != 15 ||
bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
- bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
+ bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
+ bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
+ bpf_fentry_test8(&arg) != 0)
goto out;
break;
case BPF_MODIFY_RETURN:
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index c0f0990f30b6..c3146b2700a0 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -15,15 +15,13 @@ extern char bpfilter_umh_end;
static void shutdown_umh(void)
{
- struct task_struct *tsk;
+ struct umd_info *info = &bpfilter_ops.info;
+ struct pid *tgid = info->tgid;
- if (bpfilter_ops.stop)
- return;
-
- tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);
- if (tsk) {
- send_sig(SIGKILL, tsk, 1);
- put_task_struct(tsk);
+ if (tgid) {
+ kill_pid(tgid, SIGKILL, 1);
+ wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
+ bpfilter_umh_cleanup(info);
}
}
@@ -39,7 +37,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
{
struct mbox_request req;
struct mbox_reply reply;
- loff_t pos;
+ loff_t pos = 0;
ssize_t n;
int ret = -EFAULT;
@@ -48,9 +46,9 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
req.cmd = optname;
req.addr = (long __force __user)optval;
req.len = optlen;
- if (!bpfilter_ops.info.pid)
+ if (!bpfilter_ops.info.tgid)
goto out;
- n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
+ n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
&pos);
if (n != sizeof(req)) {
pr_err("write fail %zd\n", n);
@@ -77,13 +75,10 @@ static int start_umh(void)
int err;
/* fork usermode process */
- err = fork_usermode_blob(&bpfilter_umh_start,
- &bpfilter_umh_end - &bpfilter_umh_start,
- &bpfilter_ops.info);
+ err = fork_usermode_driver(&bpfilter_ops.info);
if (err)
return err;
- bpfilter_ops.stop = false;
- pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);
+ pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
/* health check that usermode process started correctly */
if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
@@ -98,18 +93,21 @@ static int __init load_umh(void)
{
int err;
+ err = umd_load_blob(&bpfilter_ops.info,
+ &bpfilter_umh_start,
+ &bpfilter_umh_end - &bpfilter_umh_start);
+ if (err)
+ return err;
+
mutex_lock(&bpfilter_ops.lock);
- if (!bpfilter_ops.stop) {
- err = -EFAULT;
- goto out;
- }
err = start_umh();
if (!err && IS_ENABLED(CONFIG_INET)) {
bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
bpfilter_ops.start = &start_umh;
}
-out:
mutex_unlock(&bpfilter_ops.lock);
+ if (err)
+ umd_unload_blob(&bpfilter_ops.info);
return err;
}
@@ -122,6 +120,8 @@ static void __exit fini_umh(void)
bpfilter_ops.sockopt = NULL;
}
mutex_unlock(&bpfilter_ops.lock);
+
+ umd_unload_blob(&bpfilter_ops.info);
}
module_init(load_umh);
module_exit(fini_umh);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
index 9ea6100dca87..40311d10d2f2 100644
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
- .section .rodata, "a"
+ .section .init.rodata, "a"
.global bpfilter_umh_start
bpfilter_umh_start:
.incbin "net/bpfilter/bpfilter_umh"
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 51a6414145d2..80879196560c 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -8,7 +8,7 @@ config BRIDGE
select LLC
select STP
depends on IPV6 || IPV6=n
- ---help---
+ help
If you say Y here, then your Linux box will be able to act as an
Ethernet bridge, which means that the different Ethernet segments it
is connected to will appear as one Ethernet to the participants.
@@ -39,7 +39,7 @@ config BRIDGE_IGMP_SNOOPING
depends on BRIDGE
depends on INET
default y
- ---help---
+ help
If you say Y here, then the Ethernet bridge will be able selectively
forward multicast traffic based on IGMP/MLD traffic received from
each port.
@@ -53,7 +53,7 @@ config BRIDGE_VLAN_FILTERING
depends on BRIDGE
depends on VLAN_8021Q
default n
- ---help---
+ help
If you say Y here, then the Ethernet bridge will be able selectively
receive and forward traffic based on VLAN information in the packet
any VLAN information configured on the bridge port or bridge device.
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8ec1362588af..8c7b78f8bc23 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -105,6 +105,13 @@ out:
return NETDEV_TX_OK;
}
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -143,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
br_fdb_hash_fini(br);
}
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 24986ec7d38c..90592af9db61 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -86,7 +86,7 @@ static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p,
{
struct ethhdr *eth_hdr;
struct sk_buff *skb;
- u16 *version;
+ __be16 *version;
skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH);
if (!skb)
@@ -411,10 +411,16 @@ int br_mrp_set_port_role(struct net_bridge_port *p,
if (!mrp)
return -EINVAL;
- if (role == BR_MRP_PORT_ROLE_PRIMARY)
+ switch (role) {
+ case BR_MRP_PORT_ROLE_PRIMARY:
rcu_assign_pointer(mrp->p_port, p);
- else
+ break;
+ case BR_MRP_PORT_ROLE_SECONDARY:
rcu_assign_pointer(mrp->s_port, p);
+ break;
+ default:
+ return -EINVAL;
+ }
br_mrp_port_switchdev_set_role(p, role);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 83490bf73a13..4c4a93abde68 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs);
if (skb_transport_offset(skb) + ipv6_transport_len(skb) <
- nsrcs_offset + sizeof(_nsrcs))
+ nsrcs_offset + sizeof(__nsrcs))
return -EINVAL;
_nsrcs = skb_header_pointer(skb, nsrcs_offset,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7501be4eeba0..e0ea6dbbc97e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -217,8 +217,8 @@ struct net_bridge_port_group {
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
+ unsigned char eth_addr[ETH_ALEN] __aligned(2);
unsigned char flags;
- unsigned char eth_addr[ETH_ALEN];
};
struct net_bridge_mdb_entry {
@@ -430,7 +430,7 @@ struct net_bridge {
struct hlist_head fdb_list;
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
- struct list_head __rcu mrp_list;
+ struct list_head mrp_list;
#endif
};
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 33b255e38ffe..315eb37d89f0 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -8,7 +8,7 @@
struct br_mrp {
/* list of mrp instances */
- struct list_head __rcu list;
+ struct list_head list;
struct net_bridge_port __rcu *p_port;
struct net_bridge_port __rcu *s_port;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c83ffe912163..b13b49b9f75c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1047,7 +1047,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
vfree(counterstmp);
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
- AUDIT_XT_OP_REPLACE);
+ AUDIT_XT_OP_REPLACE, GFP_KERNEL);
return ret;
free_unlock:
@@ -1123,7 +1123,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
list_del(&table->list);
mutex_unlock(&ebt_mutex);
audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries,
- AUDIT_XT_OP_UNREGISTER);
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
ebt_cleanup_entry, net, NULL);
if (table->private->nentries)
@@ -1218,7 +1218,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
}
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
- AUDIT_XT_OP_REGISTER);
+ AUDIT_XT_OP_REGISTER, GFP_KERNEL);
return ret;
free_unlock:
mutex_unlock(&ebt_mutex);
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 7c9e92b2f806..8e8ffac037cd 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -155,3 +155,4 @@ module_exit(nft_meta_bridge_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>");
MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
+MODULE_DESCRIPTION("Support for bridge dedicated meta key");
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index f48cf4cfb80f..deae2c9a0f69 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -455,3 +455,4 @@ module_exit(nft_reject_bridge_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject");
+MODULE_DESCRIPTION("Reject packets from bridge via nftables");
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index b7532a79ca7a..87205251cc25 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -7,7 +7,7 @@ menuconfig CAIF
tristate "CAIF support"
select CRC_CCITT
default n
- ---help---
+ help
The "Communication CPU to Application CPU Interface" (CAIF) is a packet
based connection-oriented MUX protocol developed by ST-Ericsson for use
with its modems. It is accessed from user space as sockets (PF_CAIF).
@@ -26,7 +26,7 @@ config CAIF_DEBUG
bool "Enable Debug"
depends on CAIF
default n
- ---help---
+ help
Enable the inclusion of debug code in the CAIF stack.
Be aware that doing this will impact performance.
If unsure say N.
@@ -35,7 +35,7 @@ config CAIF_NETDEV
tristate "CAIF GPRS Network device"
depends on CAIF
default CAIF
- ---help---
+ help
Say Y if you will be using a CAIF based GPRS network device.
This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
@@ -46,7 +46,7 @@ config CAIF_USB
tristate "CAIF USB support"
depends on CAIF
default n
- ---help---
+ help
Say Y if you are using CAIF over USB CDC NCM.
This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
diff --git a/net/can/Kconfig b/net/can/Kconfig
index d77042752457..25436a715db3 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -6,7 +6,7 @@
menuconfig CAN
depends on NET
tristate "CAN bus subsystem support"
- ---help---
+ help
Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
communications protocol. Development of the CAN bus started in
1983 at Robert Bosch GmbH, and the protocol was officially
@@ -23,7 +23,7 @@ if CAN
config CAN_RAW
tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
default y
- ---help---
+ help
The raw CAN protocol option offers access to the CAN bus via
the BSD socket API. You probably want to use the raw socket in
most cases where no higher level protocol is being used. The raw
@@ -33,7 +33,7 @@ config CAN_RAW
config CAN_BCM
tristate "Broadcast Manager CAN Protocol (with content filtering)"
default y
- ---help---
+ help
The Broadcast Manager offers content filtering, timeout monitoring,
sending of RTR frames, and cyclic CAN messages without permanent user
interaction. The BCM can be 'programmed' via the BSD socket API and
@@ -45,7 +45,7 @@ config CAN_BCM
config CAN_GW
tristate "CAN Gateway/Router (with netlink configuration)"
default y
- ---help---
+ help
The CAN Gateway/Router is used to route (and modify) CAN frames.
It is based on the PF_CAN core infrastructure for msg filtering and
msg sending and can optionally modify routed CAN frames on the fly.
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index afe0e8184c23..4e7edd707a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -332,6 +332,7 @@ struct ceph_options *ceph_alloc_options(void)
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
+ opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
return opt;
}
EXPORT_SYMBOL(ceph_alloc_options);
@@ -490,16 +491,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_read_from_replica:
switch (result.uint_32) {
case Opt_read_from_replica_no:
- opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
- CEPH_OSD_FLAG_LOCALIZE_READS);
+ opt->read_from_replica = 0;
break;
case Opt_read_from_replica_balance:
- opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
- opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
+ opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS;
break;
case Opt_read_from_replica_localize:
- opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
- opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
+ opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS;
break;
default:
BUG();
@@ -613,9 +611,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
}
seq_putc(m, ',');
}
- if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
+ if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) {
seq_puts(m, "read_from_replica=balance,");
- } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
+ } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,");
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4fea3c33af2a..2db8b44e70c2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -445,8 +445,10 @@ static void target_copy(struct ceph_osd_request_target *dest,
dest->size = src->size;
dest->min_size = src->min_size;
dest->sort_bitwise = src->sort_bitwise;
+ dest->recovery_deletes = src->recovery_deletes;
dest->flags = src->flags;
+ dest->used_replica = src->used_replica;
dest->paused = src->paused;
dest->epoch = src->epoch;
@@ -1117,10 +1119,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
truncate_size, truncate_seq);
}
- req->r_flags = flags;
req->r_base_oloc.pool = layout->pool_id;
req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum);
+ req->r_flags = flags | osdc->client->options->read_from_replica;
req->r_snapid = vino.snap;
if (flags & CEPH_OSD_FLAG_WRITE)
@@ -2431,14 +2433,11 @@ promote:
static void account_request(struct ceph_osd_request *req)
{
- struct ceph_osd_client *osdc = req->r_osdc;
-
WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
req->r_flags |= CEPH_OSD_FLAG_ONDISK;
- req->r_flags |= osdc->client->options->osd_req_flags;
- atomic_inc(&osdc->num_requests);
+ atomic_inc(&req->r_osdc->num_requests);
req->r_start_stamp = jiffies;
req->r_start_latency = ktime_get();
diff --git a/net/compat.c b/net/compat.c
index 5e3041a2c37d..77f3a0e98fd0 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -202,7 +202,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
/* Advance. */
kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp);
- ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+ ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len);
}
/*
@@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
return 0;
}
-void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
+static int scm_max_fds_compat(struct msghdr *msg)
{
- struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
- int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
- int fdnum = scm->fp->count;
- struct file **fp = scm->fp->fp;
- int __user *cmfptr;
- int err = 0, i;
+ if (msg->msg_controllen <= sizeof(struct compat_cmsghdr))
+ return 0;
+ return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
+}
- if (fdnum < fdmax)
- fdmax = fdnum;
+void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
+{
+ struct compat_cmsghdr __user *cm =
+ (struct compat_cmsghdr __user *)msg->msg_control;
+ unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
+ int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count);
+ int __user *cmsg_data = CMSG_USER_DATA(cm);
+ int err = 0, i;
- for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) {
- int new_fd;
- err = security_file_receive(fp[i]);
- if (err)
- break;
- err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags
- ? O_CLOEXEC : 0);
+ for (i = 0; i < fdmax; i++) {
+ err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
break;
- new_fd = err;
- err = put_user(new_fd, cmfptr);
- if (err) {
- put_unused_fd(new_fd);
- break;
- }
- /* Bump the usage count and install the file. */
- fd_install(new_fd, get_file(fp[i]));
}
if (i > 0) {
int cmlen = CMSG_COMPAT_LEN(i * sizeof(int));
+
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
@@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
- kmsg->msg_control += cmlen;
- kmsg->msg_controllen -= cmlen;
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
}
}
- if (i < fdnum)
- kmsg->msg_flags |= MSG_CTRUNC;
+
+ if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
+ msg->msg_flags |= MSG_CTRUNC;
/*
- * All of the files that fit in the message have had their
- * usage counts incremented, so we just free the list.
+ * All of the files that fit in the message have had their usage counts
+ * incremented, so we just free the list.
*/
__scm_destroy(scm);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 061496a1f640..7a774ebf64e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -439,6 +439,7 @@ static const char *const netdev_lock_name[] = {
"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
@@ -460,11 +461,25 @@ static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
netdev_lock_name[i]);
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+ int i;
+
+ i = netdev_lock_pos(dev->type);
+ lockdep_set_class_and_name(&dev->addr_list_lock,
+ &netdev_addr_lock_key[i],
+ netdev_lock_name[i]);
+}
#else
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
unsigned short dev_type)
{
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+}
#endif
/*******************************************************************************
@@ -4177,10 +4192,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_disable();
+ dev_xmit_recursion_inc();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
+ dev_xmit_recursion_dec();
local_bh_enable();
@@ -5584,7 +5601,7 @@ static void flush_backlog(struct work_struct *work)
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
- kfree_skb(skb);
+ dev_kfree_skb_irq(skb);
input_queue_head_incr(sd);
}
}
@@ -9373,15 +9390,6 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
-void netdev_update_lockdep_key(struct net_device *dev)
-{
- lockdep_unregister_key(&dev->addr_list_lock_key);
- lockdep_register_key(&dev->addr_list_lock_key);
-
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
-}
-EXPORT_SYMBOL(netdev_update_lockdep_key);
-
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -9420,7 +9428,7 @@ int register_netdevice(struct net_device *dev)
return ret;
spin_lock_init(&dev->addr_list_lock);
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+ netdev_set_addr_lockdep_class(dev);
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
@@ -9541,6 +9549,13 @@ int register_netdevice(struct net_device *dev)
rcu_barrier();
dev->reg_state = NETREG_UNREGISTERED;
+ /* We should put the kobject that hold in
+ * netdev_unregister_kobject(), otherwise
+ * the net device cannot be freed when
+ * driver calls free_netdev(), because the
+ * kobject is being hold.
+ */
+ kobject_put(&dev->dev.kobj);
}
/*
* Prevent userspace races by waiting until the network
@@ -9939,8 +9954,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- lockdep_register_key(&dev->addr_list_lock_key);
-
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
@@ -10028,8 +10041,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
- lockdep_unregister_key(&dev->addr_list_lock_key);
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 2f949b5a1eb9..54cd568e7c2f 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -690,8 +690,17 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return;
+ /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two
+ * reasons:
+ * 1) This is always called without any addr_list_lock, so as the
+ * outermost one here, it must be 0.
+ * 2) This is called by some callers after unlinking the upper device,
+ * so the dev->lower_level becomes 1 again.
+ * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or
+ * larger.
+ */
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -858,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -888,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -911,8 +920,9 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return;
+ /* See the above comments inside dev_uc_unsync(). */
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2cafbc808b09..47f14a2f25fb 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP) {
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
@@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP) {
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
@@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP) {
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
@@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err && err != -EOPNOTSUPP) {
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
@@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err && err != -EOPNOTSUPP) {
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
@@ -4518,7 +4528,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
mutex_unlock(&devlink->lock);
- if (err && err != -EOPNOTSUPP)
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ else if (err)
break;
idx++;
}
@@ -8567,6 +8579,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
DEVLINK_TRAP_GROUP(PIM),
DEVLINK_TRAP_GROUP(UC_LB),
DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+ DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
DEVLINK_TRAP_GROUP(IPV6),
DEVLINK_TRAP_GROUP(PTP_EVENT),
DEVLINK_TRAP_GROUP(PTP_GENERAL),
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 2ee7bc4c9e03..b09bebeadf0b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1721,3 +1721,4 @@ module_exit(exit_net_drop_monitor);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
MODULE_ALIAS_GENL_FAMILY("NET_DM");
+MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts");
diff --git a/net/core/filter.c b/net/core/filter.c
index 209482a4eaa2..82e1b5b06167 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
u8 *end = skb_tail_pointer(skb);
- u8 *net = skb_network_header(skb);
- u8 *mac = skb_mac_header(skb);
- u8 *ptr;
+ u8 *start, *ptr;
- if (unlikely(offset > 0xffff || len > (end - mac)))
+ if (unlikely(offset > 0xffff))
goto err_clear;
switch (start_header) {
case BPF_HDR_START_MAC:
- ptr = mac + offset;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ goto err_clear;
+ start = skb_mac_header(skb);
break;
case BPF_HDR_START_NET:
- ptr = net + offset;
+ start = skb_network_header(skb);
break;
default:
goto err_clear;
}
- if (likely(ptr >= mac && ptr + len <= end)) {
+ ptr = start + offset;
+
+ if (likely(ptr + len <= end)) {
memcpy(to, ptr, len);
return 0;
}
@@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
break;
case SO_BINDTODEVICE:
- ret = -ENOPROTOOPT;
-#ifdef CONFIG_NETDEVICES
optlen = min_t(long, optlen, IFNAMSIZ - 1);
strncpy(devname, optval, optlen);
devname[optlen] = 0;
@@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
}
ret = sock_bindtoindex(sk, ifindex, false);
-#endif
break;
default:
ret = -EINVAL;
@@ -5854,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
- if (skb->protocol == cpu_to_be16(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
iphdr_len = sizeof(struct iphdr);
- else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
iphdr_len = sizeof(struct ipv6hdr);
- else
+ break;
+ default:
return 0;
+ }
if (skb_headlen(skb) < iphdr_len)
return 0;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d02df0b6d0d9..142a8824f0a8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
EXPORT_SYMBOL(skb_flow_dissector_init);
#ifdef CONFIG_BPF_SYSCALL
-int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
+int flow_dissector_bpf_prog_attach_check(struct net *net,
+ struct bpf_prog *prog)
{
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
- struct bpf_prog *attached;
if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
@@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
for_each_net(ns) {
if (ns == &init_net)
continue;
- if (rcu_access_pointer(ns->bpf.progs[type]))
+ if (rcu_access_pointer(ns->bpf.run_array[type]))
return -EEXIST;
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
- if (rcu_access_pointer(init_net.bpf.progs[type]))
+ if (rcu_access_pointer(init_net.bpf.run_array[type]))
return -EEXIST;
}
- attached = rcu_dereference_protected(net->bpf.progs[type],
- lockdep_is_held(&netns_bpf_mutex));
- if (attached == prog)
- /* The same program cannot be attached twice */
- return -EINVAL;
-
- rcu_assign_pointer(net->bpf.progs[type], prog);
- if (attached)
- bpf_prog_put(attached);
return 0;
}
#endif /* CONFIG_BPF_SYSCALL */
@@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct bpf_prog *attached = NULL;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
bool mpls_el = false;
@@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
+ struct bpf_prog_array *run_array;
rcu_read_lock();
- attached = rcu_dereference(init_net.bpf.progs[type]);
-
- if (!attached)
- attached = rcu_dereference(net->bpf.progs[type]);
+ run_array = rcu_dereference(init_net.bpf.run_array[type]);
+ if (!run_array)
+ run_array = rcu_dereference(net->bpf.run_array[type]);
- if (attached) {
+ if (run_array) {
struct bpf_flow_keys flow_keys;
struct bpf_flow_dissector ctx = {
.flow_keys = &flow_keys,
@@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net,
.data_end = data + hlen,
};
__be16 n_proto = proto;
+ struct bpf_prog *prog;
if (skb) {
ctx.skb = skb;
@@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net,
n_proto = skb->protocol;
}
- ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
+ prog = READ_ONCE(run_array->items[0].prog);
+ ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
hlen, flags);
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 0cfc35e6be28..2076219b8ba5 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -4,6 +4,7 @@
#include <net/flow_offload.h>
#include <linux/rtnetlink.h>
#include <linux/mutex.h>
+#include <linux/rhashtable.h>
struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
@@ -372,14 +373,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
}
EXPORT_SYMBOL(flow_indr_dev_register);
-static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv,
+static void __flow_block_indr_cleanup(void (*release)(void *cb_priv),
+ void *cb_priv,
struct list_head *cleanup_list)
{
struct flow_block_cb *this, *next;
list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) {
- if (this->cb == setup_cb &&
- this->cb_priv == cb_priv) {
+ if (this->release == release &&
+ this->indr.cb_priv == cb_priv) {
list_move(&this->indr.list, cleanup_list);
return;
}
@@ -397,7 +399,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list)
}
void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
- flow_setup_cb_t *setup_cb)
+ void (*release)(void *cb_priv))
{
struct flow_indr_dev *this, *next, *indr_dev = NULL;
LIST_HEAD(cleanup_list);
@@ -418,7 +420,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
return;
}
- __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list);
+ __flow_block_indr_cleanup(release, cb_priv, &cleanup_list);
mutex_unlock(&flow_indr_block_lock);
flow_block_indr_notify(&cleanup_list);
@@ -429,32 +431,37 @@ EXPORT_SYMBOL(flow_indr_dev_unregister);
static void flow_block_indr_init(struct flow_block_cb *flow_block,
struct flow_block_offload *bo,
struct net_device *dev, void *data,
+ void *cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
flow_block->indr.binder_type = bo->binder_type;
flow_block->indr.data = data;
+ flow_block->indr.cb_priv = cb_priv;
flow_block->indr.dev = dev;
flow_block->indr.cleanup = cleanup;
}
-static void __flow_block_indr_binding(struct flow_block_offload *bo,
- struct net_device *dev, void *data,
- void (*cleanup)(struct flow_block_cb *block_cb))
+struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
+ void (*release)(void *cb_priv),
+ struct flow_block_offload *bo,
+ struct net_device *dev, void *data,
+ void *indr_cb_priv,
+ void (*cleanup)(struct flow_block_cb *block_cb))
{
struct flow_block_cb *block_cb;
- list_for_each_entry(block_cb, &bo->cb_list, list) {
- switch (bo->command) {
- case FLOW_BLOCK_BIND:
- flow_block_indr_init(block_cb, bo, dev, data, cleanup);
- list_add(&block_cb->indr.list, &flow_block_indr_list);
- break;
- case FLOW_BLOCK_UNBIND:
- list_del(&block_cb->indr.list);
- break;
- }
- }
+ block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release);
+ if (IS_ERR(block_cb))
+ goto out;
+
+ flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup);
+ list_add(&block_cb->indr.list, &flow_block_indr_list);
+
+out:
+ return block_cb;
}
+EXPORT_SYMBOL(flow_indr_block_cb_alloc);
int flow_indr_dev_setup_offload(struct net_device *dev,
enum tc_setup_type type, void *data,
@@ -465,9 +472,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev,
mutex_lock(&flow_indr_block_lock);
list_for_each_entry(this, &flow_block_indr_dev_list, list)
- this->cb(dev, this->cb_priv, type, bo);
+ this->cb(dev, this->cb_priv, type, bo, data, cleanup);
- __flow_block_indr_binding(bo, dev, data, cleanup);
mutex_unlock(&flow_indr_block_lock);
return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e353b822bb15..9de33b594ff2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -11,6 +11,7 @@
#include <linux/if_arp.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
+#include <linux/sched/isolation.h>
#include <linux/nsproxy.h>
#include <net/sock.h>
#include <net/net_namespace.h>
@@ -741,7 +742,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *old_map, *map;
cpumask_var_t mask;
- int err, cpu, i;
+ int err, cpu, i, hk_flags;
static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
@@ -756,6 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
return err;
}
+ hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
+ cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+ if (cpumask_empty(mask)) {
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+
map = kzalloc(max_t(unsigned int,
RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
GFP_KERNEL);
@@ -1108,7 +1116,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
trans_timeout = queue->trans_timeout;
spin_unlock_irq(&queue->_xmit_lock);
- return sprintf(buf, "%lu", trans_timeout);
+ return sprintf(buf, fmt_ulong, trans_timeout);
}
static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2269199c5891..85a4b0101f76 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2462,7 +2462,6 @@ static int do_set_master(struct net_device *dev, int ifindex,
err = ops->ndo_del_slave(upper_dev, dev);
if (err)
return err;
- netdev_update_lockdep_key(dev);
} else {
return -EOPNOTSUPP;
}
@@ -3344,7 +3343,8 @@ replay:
*/
if (err < 0) {
/* If device is not registered at all, free it now */
- if (dev->reg_state == NETREG_UNINITIALIZED)
+ if (dev->reg_state == NETREG_UNINITIALIZED ||
+ dev->reg_state == NETREG_UNREGISTERED)
free_netdev(dev);
goto out;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 875df1c2989d..8156d4fb8a39 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
}
EXPORT_SYMBOL(put_cmsg_scm_timestamping);
-static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
-{
- struct socket *sock;
- int new_fd;
- int error;
-
- error = security_file_receive(file);
- if (error)
- return error;
-
- new_fd = get_unused_fd_flags(o_flags);
- if (new_fd < 0)
- return new_fd;
-
- error = put_user(new_fd, ufd);
- if (error) {
- put_unused_fd(new_fd);
- return error;
- }
-
- /* Bump the usage count and install the file. */
- sock = sock_from_file(file, &error);
- if (sock) {
- sock_update_netprioidx(&sock->sk->sk_cgrp_data);
- sock_update_classid(&sock->sk->sk_cgrp_data);
- }
- fd_install(new_fd, get_file(file));
- return 0;
-}
-
static int scm_max_fds(struct msghdr *msg)
{
if (msg->msg_controllen <= sizeof(struct cmsghdr))
@@ -319,29 +289,29 @@ static int scm_max_fds(struct msghdr *msg)
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
- struct cmsghdr __user *cm
- = (__force struct cmsghdr __user*)msg->msg_control;
- int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
+ struct cmsghdr __user *cm =
+ (__force struct cmsghdr __user *)msg->msg_control;
+ unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
int __user *cmsg_data = CMSG_USER_DATA(cm);
int err = 0, i;
+ /* no use for FD passing from kernel space callers */
+ if (WARN_ON_ONCE(!msg->msg_control_is_user))
+ return;
+
if (msg->msg_flags & MSG_CMSG_COMPAT) {
scm_detach_fds_compat(msg, scm);
return;
}
- /* no use for FD passing from kernel space callers */
- if (WARN_ON_ONCE(!msg->msg_control_is_user))
- return;
-
for (i = 0; i < fdmax; i++) {
- err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
- if (err)
+ err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
+ if (err < 0)
break;
}
- if (i > 0) {
+ if (i > 0) {
int cmlen = CMSG_LEN(i * sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 351afbf6bfba..6a32a1fd34f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
return container_of(parser, struct sk_psock, parser);
}
-static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
+static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
@@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
}
}
-static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
- struct sk_buff *skb, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
- sk_psock_skb_redirect(psock, skb);
+ sk_psock_skb_redirect(skb);
break;
case __SK_PASS:
case __SK_DROP:
@@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
+ sk_psock_tls_verdict_apply(skb, ret);
rcu_read_unlock();
- sk_psock_tls_verdict_apply(psock, skb, ret);
return ret;
}
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
@@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
}
goto out_free;
case __SK_REDIRECT:
- sk_psock_skb_redirect(psock, skb);
+ sk_psock_skb_redirect(skb);
break;
case __SK_DROP:
/* fall-through */
@@ -782,11 +781,18 @@ out_free:
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
{
- struct sk_psock *psock = sk_psock_from_strp(strp);
+ struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
+ struct sock *sk;
rcu_read_lock();
+ sk = strp->sk;
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ kfree_skb(skb);
+ goto out;
+ }
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
skb_orphan(skb);
@@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
- rcu_read_unlock();
sk_psock_verdict_apply(psock, skb, ret);
+out:
+ rcu_read_unlock();
}
static int sk_psock_strp_read_done(struct strparser *strp, int err)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c4acf1f0220..8ccdcdaaa673 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -718,7 +718,7 @@ bool sk_mc_loop(struct sock *sk)
return inet6_sk(sk)->mc_loop;
#endif
}
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return true;
}
EXPORT_SYMBOL(sk_mc_loop);
@@ -1767,6 +1767,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
+ sk_tx_queue_clear(sk);
}
return sk;
@@ -1925,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
/* sk->sk_memcg will be populated at accept() time */
newsk->sk_memcg = NULL;
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
@@ -1972,7 +1973,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
/*
* Before updating sk_refcnt, we must commit prior changes to memory
- * (Documentation/RCU/rculist_nulls.txt for details)
+ * (Documentation/RCU/rculist_nulls.rst for details)
*/
smp_wmb();
refcount_set(&newsk->sk_refcnt, 2);
@@ -1990,6 +1991,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
+ sk_tx_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
if (newsk->sk_prot->sockets_allocated)
@@ -2840,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
}
EXPORT_SYMBOL(sock_no_mmap);
+/*
+ * When a file is received (via SCM_RIGHTS, etc), we must bump the
+ * various sock-based usage counts.
+ */
+void __receive_sock(struct file *file)
+{
+ struct socket *sock;
+ int error;
+
+ /*
+ * The resulting value of "error" is ignored here since we only
+ * need to take action when the file is a socket and testing
+ * "sock" for NULL is sufficient.
+ */
+ sock = sock_from_file(file, &error);
+ if (sock) {
+ sock_update_netprioidx(&sock->sk->sk_cgrp_data);
+ sock_update_classid(&sock->sk->sk_cgrp_data);
+ }
+}
+
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
ssize_t res;
@@ -3033,7 +3056,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_rx_queue_clear(sk);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
- * (Documentation/RCU/rculist_nulls.txt for details)
+ * (Documentation/RCU/rculist_nulls.rst for details)
*/
smp_wmb();
refcount_set(&sk->sk_refcnt, 1);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 00a26cf2cfe9..0971f17e8e54 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
struct fd f;
int ret;
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, attr->attach_type);
+ ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ fdput(f);
+ return ret;
+}
+
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+ u32 ufd = attr->target_fd;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ int ret;
+
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ prog = bpf_prog_get(attr->attach_bpf_fd);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ goto put_map;
+ }
+
+ if (prog->type != ptype) {
+ ret = -EINVAL;
+ goto put_prog;
+ }
+
+ ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+put_prog:
+ bpf_prog_put(prog);
+put_map:
fdput(f);
return ret;
}
@@ -424,10 +462,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
-static bool sock_map_redirect_allowed(const struct sock *sk)
-{
- return sk->sk_state != TCP_LISTEN;
-}
+static bool sock_map_redirect_allowed(const struct sock *sk);
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
@@ -508,6 +543,11 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static bool sock_map_redirect_allowed(const struct sock *sk)
+{
+ return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+}
+
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
@@ -989,11 +1029,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
err = -EINVAL;
goto free_htab;
}
+ err = bpf_map_charge_init(&htab->map.memory, cost);
+ if (err)
+ goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
+ bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
@@ -1013,6 +1057,7 @@ static void sock_hash_free(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct bpf_htab_bucket *bucket;
+ struct hlist_head unlink_list;
struct bpf_htab_elem *elem;
struct hlist_node *node;
int i;
@@ -1024,13 +1069,32 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
- hlist_del_rcu(&elem->node);
+
+ /* We are racing with sock_hash_delete_from_link to
+ * enter the spin-lock critical section. Every socket on
+ * the list is still linked to sockhash. Since link
+ * exists, psock exists and holds a ref to socket. That
+ * lets us to grab a socket ref too.
+ */
+ raw_spin_lock_bh(&bucket->lock);
+ hlist_for_each_entry(elem, &bucket->head, node)
+ sock_hold(elem->sk);
+ hlist_move_list(&bucket->head, &unlink_list);
+ raw_spin_unlock_bh(&bucket->lock);
+
+ /* Process removed entries out of atomic context to
+ * block for socket lock before deleting the psock's
+ * link to sockhash.
+ */
+ hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
+ hlist_del(&elem->node);
lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk);
+ sock_put(elem->sk);
+ sock_hash_free_elem(htab, elem);
}
}
@@ -1177,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
}
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- u32 which)
+ struct bpf_prog *old, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- psock_set_prog(&progs->msg_parser, prog);
+ pprog = &progs->msg_parser;
break;
case BPF_SK_SKB_STREAM_PARSER:
- psock_set_prog(&progs->skb_parser, prog);
+ pprog = &progs->skb_parser;
break;
case BPF_SK_SKB_STREAM_VERDICT:
- psock_set_prog(&progs->skb_verdict, prog);
+ pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ if (old)
+ return psock_replace_prog(pprog, prog, old);
+
+ psock_set_prog(pprog, prog);
return 0;
}
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index adcb3aea576d..bbdd3c7b6cb5 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -101,6 +101,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
more_reuse->bind_inany = reuse->bind_inany;
+ more_reuse->has_conns = reuse->has_conns;
memcpy(more_reuse->socks, reuse->socks,
reuse->num_socks * sizeof(struct sock *));
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f93f8ace6c56..6ada114bbcca 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (jit_enable < 2 ||
- (jit_enable == 2 && bpf_dump_raw_ok())) {
+ (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) {
*(int *)table->data = jit_enable;
if (jit_enable == 2)
pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 90f44f382115..3c45f99e26d5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->len = totsize - metasize;
xdpf->headroom = 0;
xdpf->metasize = metasize;
+ xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp);
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
index 917e6e7b1cac..efee8b9fe1d4 100644
--- a/net/dcb/Kconfig
+++ b/net/dcb/Kconfig
@@ -2,7 +2,7 @@
config DCB
bool "Data Center Bridging support"
default n
- ---help---
+ help
This enables support for configuring Data Center Bridging (DCB)
features on DCB capable Ethernet adapters via rtnetlink. Say 'Y'
if you have a DCB capable Ethernet adapter which supports this
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index f7c7495677b0..51ac2631fb48 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -2,7 +2,7 @@
menuconfig IP_DCCP
tristate "The DCCP Protocol"
depends on INET
- ---help---
+ help
Datagram Congestion Control Protocol (RFC 4340)
From http://www.ietf.org/rfc/rfc4340.txt:
@@ -32,7 +32,7 @@ menu "DCCP Kernel Hacking"
config IP_DCCP_DEBUG
bool "DCCP debug messages"
- ---help---
+ help
Only use this if you're hacking DCCP.
When compiling DCCP as a module, this debugging output can be toggled
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 4a358e6847a8..4d7771f36eff 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -3,7 +3,7 @@ menu "DCCP CCIDs Configuration"
config IP_DCCP_CCID2_DEBUG
bool "CCID-2 debugging messages"
- ---help---
+ help
Enable CCID-2 specific debugging messages.
The debugging output can additionally be toggled by setting the
@@ -14,7 +14,7 @@ config IP_DCCP_CCID2_DEBUG
config IP_DCCP_CCID3
bool "CCID-3 (TCP-Friendly)"
def_bool y if (IP_DCCP = y || IP_DCCP = m)
- ---help---
+ help
CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
@@ -39,7 +39,7 @@ config IP_DCCP_CCID3
config IP_DCCP_CCID3_DEBUG
bool "CCID-3 debugging messages"
depends on IP_DCCP_CCID3
- ---help---
+ help
Enable CCID-3 specific debugging messages.
The debugging output can additionally be toggled by setting the
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 3b42f5c6a63d..9fed0ae21e63 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -56,7 +56,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
(dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
- unsigned char *uninitialized_var(value);
+ unsigned char *value;
u32 elapsed_time;
__be32 opt_val;
int rc;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 4af8a98fe784..c13b6609474b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1139,14 +1139,14 @@ static int __init dccp_init(void)
inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
- goto out_fail;
+ goto out_free_percpu;
rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out_free_percpu;
+ goto out_free_hashinfo2;
/*
* Size and allocate the main established and bind bucket
@@ -1242,6 +1242,8 @@ out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+out_free_hashinfo2:
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
out_free_percpu:
percpu_counter_destroy(&dccp_orphan_count);
out_fail:
@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void)
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_ackvec_exit();
dccp_sysctl_exit();
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
percpu_counter_destroy(&dccp_orphan_count);
}
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 8f98fb2f2ec9..24336bdb1054 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -4,7 +4,7 @@
#
config DECNET
tristate "DECnet Support"
- ---help---
+ help
The DECnet networking protocol was used in many products made by
Digital (now Compaq). It provides reliable stream and sequenced
packet communications over which run a variety of services similar
@@ -29,7 +29,7 @@ config DECNET_ROUTER
bool "DECnet: router support"
depends on DECNET
select FIB_RULES
- ---help---
+ help
Add support for turning your DECnet Endnode into a level 1 or 2
router. This is an experimental, but functional option. If you
do say Y here, then make sure that you also say Y to "Kernel/User
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 739613070d07..d5bc6ac599ef 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -13,7 +13,7 @@ menuconfig NET_DSA
select NET_SWITCHDEV
select PHYLINK
select NET_DEVLINK
- ---help---
+ help
Say Y if you want to enable support for the hardware switches supported
by the Distributed Switch Architecture.
diff --git a/net/dsa/master.c b/net/dsa/master.c
index a621367c6e8c..480a61460c23 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -327,6 +327,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)
rtnl_unlock();
}
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
@@ -345,6 +347,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
+ lockdep_set_class(&dev->addr_list_lock,
+ &dsa_master_addr_list_lock_key);
ret = dsa_master_ethtool_setup(dev);
if (ret)
return ret;
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e8eaa804ccb9..d6200ff98200 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -13,6 +13,16 @@
#define DSA_HLEN 4
#define EDSA_HLEN 8
+#define FRAME_TYPE_TO_CPU 0x00
+#define FRAME_TYPE_FORWARD 0x03
+
+#define TO_CPU_CODE_MGMT_TRAP 0x00
+#define TO_CPU_CODE_FRAME2REG 0x01
+#define TO_CPU_CODE_IGMP_MLD_TRAP 0x02
+#define TO_CPU_CODE_POLICY_TRAP 0x03
+#define TO_CPU_CODE_ARP_MIRROR 0x04
+#define TO_CPU_CODE_POLICY_MIRROR 0x05
+
static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -77,6 +87,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
u8 *edsa_header;
+ int frame_type;
+ int code;
int source_device;
int source_port;
@@ -91,8 +103,29 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
/*
* Check that frame type is either TO_CPU or FORWARD.
*/
- if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0)
+ frame_type = edsa_header[0] >> 6;
+
+ switch (frame_type) {
+ case FRAME_TYPE_TO_CPU:
+ code = (edsa_header[1] & 0x6) | ((edsa_header[2] >> 4) & 1);
+
+ /*
+ * Mark the frame to never egress on any port of the same switch
+ * unless it's a trapped IGMP/MLD packet, in which case the
+ * bridge might want to forward it.
+ */
+ if (code != TO_CPU_CODE_IGMP_MLD_TRAP)
+ skb->offload_fwd_mark = 1;
+
+ break;
+
+ case FRAME_TYPE_FORWARD:
+ skb->offload_fwd_mark = 1;
+ break;
+
+ default:
return NULL;
+ }
/*
* Determine source device and port.
@@ -156,8 +189,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->offload_fwd_mark = 1;
-
return skb;
}
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 7b7a0456c15c..7194956aa09e 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -234,6 +234,14 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1];
int ret;
+ cfg->first = 100;
+ cfg->step = 100;
+ cfg->last = MAX_CABLE_LENGTH_CM;
+ cfg->pair = PHY_PAIR_ALL;
+
+ if (!nest)
+ return 0;
+
ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest,
cable_test_tdr_act_cfg_policy, info->extack);
if (ret < 0)
@@ -242,17 +250,12 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST])
cfg->first = nla_get_u32(
tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]);
- else
- cfg->first = 100;
+
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST])
cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]);
- else
- cfg->last = MAX_CABLE_LENGTH_CM;
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP])
cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]);
- else
- cfg->step = 100;
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) {
cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]);
@@ -263,8 +266,6 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
"invalid pair parameter");
return -EINVAL;
}
- } else {
- cfg->pair = PHY_PAIR_ALL;
}
if (cfg->first > MAX_CABLE_LENGTH_CM) {
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 423e640e3876..aaecfc916a4d 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -40,9 +40,11 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+ [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index b5df90c981c2..21d5fc0f6bb3 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2978,7 +2978,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
sizeof(match->mask.ipv6.dst));
}
if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
- memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+ memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
match->dissector.used_keys |=
BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 7f47ba89054e..afe5ac8a0f00 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -78,19 +78,18 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
ret = linkstate_get_sqi(dev);
if (ret < 0 && ret != -EOPNOTSUPP)
- return ret;
-
+ goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
if (ret < 0 && ret != -EOPNOTSUPP)
- return ret;
-
+ goto out;
data->sqi_max = ret;
+ ret = 0;
+out:
ethnl_ops_complete(dev);
-
- return 0;
+ return ret;
}
static int linkstate_reply_size(const struct ethnl_req_info *req_base,
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 88fd07f47040..dd8a1c1dc07d 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -376,10 +376,17 @@ err_dev:
}
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
- const struct ethnl_dump_ctx *ctx)
+ const struct ethnl_dump_ctx *ctx,
+ struct netlink_callback *cb)
{
+ void *ehdr;
int ret;
+ ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &ethtool_genl_family, 0, ctx->ops->reply_cmd);
+ if (!ehdr)
+ return -EMSGSIZE;
+
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
rtnl_lock();
ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
@@ -395,6 +402,10 @@ out:
if (ctx->ops->cleanup_data)
ctx->ops->cleanup_data(ctx->reply_data);
ctx->reply_data->dev = NULL;
+ if (ret < 0)
+ genlmsg_cancel(skb, ehdr);
+ else
+ genlmsg_end(skb, ehdr);
return ret;
}
@@ -411,7 +422,6 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
int s_idx = ctx->pos_idx;
int h, idx = 0;
int ret = 0;
- void *ehdr;
rtnl_lock();
for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
@@ -431,26 +441,15 @@ restart_chain:
dev_hold(dev);
rtnl_unlock();
- ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- &ethtool_genl_family, 0,
- ctx->ops->reply_cmd);
- if (!ehdr) {
- dev_put(dev);
- ret = -EMSGSIZE;
- goto out;
- }
- ret = ethnl_default_dump_one(skb, dev, ctx);
+ ret = ethnl_default_dump_one(skb, dev, ctx, cb);
dev_put(dev);
if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
if (ret == -EOPNOTSUPP)
goto lock_and_cont;
if (likely(skb->len))
ret = skb->len;
goto out;
}
- genlmsg_end(skb, ehdr);
lock_and_cont:
rtnl_lock();
if (net->dev_base_seq != seq) {
diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
index 9c58f8763997..8095b034e76e 100644
--- a/net/hsr/Kconfig
+++ b/net/hsr/Kconfig
@@ -5,7 +5,7 @@
config HSR
tristate "High-availability Seamless Redundancy (HSR)"
- ---help---
+ help
If you say Y here, then your Linux box will be able to act as a
DANH ("Doubly attached node implementing HSR"). For this to work,
your Linux box needs (at least) two physical Ethernet interfaces,
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index cd99f548e440..a6f4e9f65b14 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -339,7 +339,7 @@ static void hsr_announce(struct timer_list *t)
rcu_read_unlock();
}
-static void hsr_del_ports(struct hsr_priv *hsr)
+void hsr_del_ports(struct hsr_priv *hsr)
{
struct hsr_port *port;
@@ -356,31 +356,12 @@ static void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
-/* This has to be called after all the readers are gone.
- * Otherwise we would have to check the return value of
- * hsr_port_get_hsr().
- */
-static void hsr_dev_destroy(struct net_device *hsr_dev)
-{
- struct hsr_priv *hsr = netdev_priv(hsr_dev);
-
- hsr_debugfs_term(hsr);
- hsr_del_ports(hsr);
-
- del_timer_sync(&hsr->prune_timer);
- del_timer_sync(&hsr->announce_timer);
-
- hsr_del_self_node(hsr);
- hsr_del_nodes(&hsr->node_db);
-}
-
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
.ndo_fix_features = hsr_fix_features,
- .ndo_uninit = hsr_dev_destroy,
};
static struct device_type hsr_type = {
@@ -434,6 +415,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec, u8 protocol_version,
struct netlink_ext_ack *extack)
{
+ bool unregister = false;
struct hsr_priv *hsr;
int res;
@@ -485,25 +467,27 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_unregister;
+ unregister = true;
+
res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack);
if (res)
- goto err_add_slaves;
+ goto err_unregister;
res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack);
if (res)
- goto err_add_slaves;
+ goto err_unregister;
hsr_debugfs_init(hsr, hsr_dev);
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
return 0;
-err_add_slaves:
- unregister_netdevice(hsr_dev);
err_unregister:
hsr_del_ports(hsr);
err_add_master:
hsr_del_self_node(hsr);
+ if (unregister)
+ unregister_netdevice(hsr_dev);
return res;
}
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index a099d7de7e79..b8f9262ed101 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include "hsr_main.h"
+void hsr_del_ports(struct hsr_priv *hsr);
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec, u8 protocol_version,
@@ -18,5 +19,4 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
bool is_hsr_master(struct net_device *dev);
int hsr_get_max_mtu(struct hsr_priv *hsr);
-
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index ed13760463de..1ea17752fffc 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -120,13 +120,18 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame,
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
-static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame,
- struct hsr_port *port, u8 proto_version)
+static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
+ struct hsr_frame_info *frame,
+ struct hsr_port *port, u8 proto_version)
{
struct hsr_ethhdr *hsr_ethhdr;
int lane_id;
int lsdu_size;
+ /* pad to minimum packet size which is 60 + 6 (HSR tag) */
+ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
+ return NULL;
+
if (port->type == HSR_PT_SLAVE_A)
lane_id = 0;
else
@@ -144,6 +149,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame,
hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
hsr_ethhdr->ethhdr.h_proto = htons(proto_version ?
ETH_P_HSR : ETH_P_PRP);
+
+ return skb;
}
static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o,
@@ -172,9 +179,10 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o,
memmove(dst, src, movelen);
skb_reset_mac_header(skb);
- hsr_fill_tag(skb, frame, port, port->hsr->prot_version);
-
- return skb;
+ /* skb_put_padto free skb on error and hsr_fill_tag returns NULL in
+ * that case
+ */
+ return hsr_fill_tag(skb, frame, port, port->hsr->prot_version);
}
/* If the original frame was an HSR tagged frame, just clone it to be sent
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 03b891904314..530de24b1fb5 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -325,7 +325,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
if (port->type != node_dst->addr_B_port)
return;
- ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B);
+ if (is_valid_ether_addr(node_dst->macaddress_B))
+ ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B);
}
void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index e2564de67603..144da15f0a81 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -6,6 +6,7 @@
*/
#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
#include <linux/rculist.h>
#include <linux/timer.h>
#include <linux/etherdevice.h>
@@ -100,8 +101,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
hsr_del_port(port);
if (hsr_slave_empty(master->hsr)) {
- unregister_netdevice_queue(master->dev,
- &list_kill);
+ const struct rtnl_link_ops *ops;
+
+ ops = master->dev->rtnl_link_ops;
+ ops->dellink(master->dev, &list_kill);
unregister_netdevice_many(&list_kill);
}
}
@@ -144,9 +147,9 @@ static int __init hsr_init(void)
static void __exit hsr_exit(void)
{
- unregister_netdevice_notifier(&hsr_nb);
hsr_netlink_exit();
hsr_debugfs_remove_root();
+ unregister_netdevice_notifier(&hsr_nb);
}
module_init(hsr_init);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 1decb25f6764..6e14b7d22639 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -83,6 +83,22 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack);
}
+static void hsr_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct hsr_priv *hsr = netdev_priv(dev);
+
+ del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->announce_timer);
+
+ hsr_debugfs_term(hsr);
+ hsr_del_ports(hsr);
+
+ hsr_del_self_node(hsr);
+ hsr_del_nodes(&hsr->node_db);
+
+ unregister_netdevice_queue(dev, head);
+}
+
static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct hsr_priv *hsr = netdev_priv(dev);
@@ -118,6 +134,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = {
.priv_size = sizeof(struct hsr_priv),
.setup = hsr_dev_setup,
.newlink = hsr_newlink,
+ .dellink = hsr_dellink,
.fill_info = hsr_fill_info,
};
diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig
index d1b4655a6d43..e808e4db2678 100644
--- a/net/ieee802154/6lowpan/Kconfig
+++ b/net/ieee802154/6lowpan/Kconfig
@@ -2,5 +2,5 @@
config IEEE802154_6LOWPAN
tristate "6lowpan support over IEEE 802.15.4"
depends on 6LOWPAN
- ---help---
+ help
IPv6 compression over IEEE 802.15.4.
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 5dbbc2ca95b4..bcb05ba97686 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig IEEE802154
tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support"
- ---help---
+ help
IEEE Std 802.15.4 defines a low data rate, low power and low
complexity short range wireless personal area networks. It was
designed to organise networks of sensors, switches, etc automation
@@ -15,13 +15,13 @@ if IEEE802154
config IEEE802154_NL802154_EXPERIMENTAL
bool "IEEE 802.15.4 experimental netlink support"
- ---help---
+ help
Adds experimental netlink support for nl802154.
config IEEE802154_SOCKET
tristate "IEEE 802.15.4 socket interface"
default y
- ---help---
+ help
Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface
for 802.15.4 dataframes. Also RAW socket interface to build MAC
header from userspace.
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 23ba5045e3d3..e64e59b536d3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -14,7 +14,7 @@ config IP_MULTICAST
config IP_ADVANCED_ROUTER
bool "IP: advanced router"
- ---help---
+ help
If you intend to run your Linux box mostly as a router, i.e. as a
computer that forwards and redistributes network packets, say Y; you
will then be presented with several options that allow more precise
@@ -56,7 +56,7 @@ config IP_ADVANCED_ROUTER
config IP_FIB_TRIE_STATS
bool "FIB TRIE statistics"
depends on IP_ADVANCED_ROUTER
- ---help---
+ help
Keep track of statistics on structure of FIB TRIE table.
Useful for testing and measuring TRIE performance.
@@ -64,7 +64,7 @@ config IP_MULTIPLE_TABLES
bool "IP: policy routing"
depends on IP_ADVANCED_ROUTER
select FIB_RULES
- ---help---
+ help
Normally, a router decides what to do with a received packet based
solely on the packet's final destination address. If you say Y here,
the Linux router will also be able to take the packet's source
@@ -117,7 +117,7 @@ config IP_PNP
config IP_PNP_DHCP
bool "IP: DHCP support"
depends on IP_PNP
- ---help---
+ help
If you want your Linux box to mount its whole root file system (the
one containing the directory /) from some other computer over the
net via NFS and you want the IP address of your computer to be
@@ -134,7 +134,7 @@ config IP_PNP_DHCP
config IP_PNP_BOOTP
bool "IP: BOOTP support"
depends on IP_PNP
- ---help---
+ help
If you want your Linux box to mount its whole root file system (the
one containing the directory /) from some other computer over the
net via NFS and you want the IP address of your computer to be
@@ -163,7 +163,7 @@ config NET_IPIP
tristate "IP: tunneling"
select INET_TUNNEL
select NET_IP_TUNNEL
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This particular tunneling driver implements
@@ -267,7 +267,7 @@ config IP_PIMSM_V2
config SYN_COOKIES
bool "IP: TCP syncookie support"
- ---help---
+ help
Normal TCP/IP networking is open to an attack known as "SYN
flooding". This denial-of-service attack prevents legitimate remote
users from being able to connect to your computer during an ongoing
@@ -307,7 +307,7 @@ config NET_IPVTI
select INET_TUNNEL
select NET_IP_TUNNEL
select XFRM
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This can be used with xfrm mode tunnel to give
@@ -323,7 +323,7 @@ config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
select XFRM
select NET_UDP_TUNNEL
- ---help---
+ help
Foo over UDP allows any IP protocol to be directly encapsulated
over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
network mechanisms and optimizations for UDP (such as ECMP
@@ -333,36 +333,38 @@ config NET_FOU_IP_TUNNELS
bool "IP: FOU encapsulation of IP tunnels"
depends on NET_IPIP || NET_IPGRE || IPV6_SIT
select NET_FOU
- ---help---
+ help
Allow configuration of FOU or GUE encapsulation for IP tunnels.
When this option is enabled IP tunnels can be configured to use
FOU or GUE encapsulation.
config INET_AH
tristate "IP: AH transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_SHA1
- ---help---
- Support for IPsec AH.
+ select XFRM_AH
+ help
+ Support for IPsec AH (Authentication Header).
+
+ AH can be used with various authentication algorithms. Besides
+ enabling AH support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
config INET_ESP
tristate "IP: ESP transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_AUTHENC
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_CBC
- select CRYPTO_SHA1
- select CRYPTO_DES
- select CRYPTO_ECHAINIV
- ---help---
- Support for IPsec ESP.
+ select XFRM_ESP
+ help
+ Support for IPsec ESP (Encapsulating Security Payload).
+
+ ESP can be used with various encryption and authentication algorithms.
+ Besides enabling ESP support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
@@ -371,7 +373,7 @@ config INET_ESP_OFFLOAD
depends on INET_ESP
select XFRM_OFFLOAD
default n
- ---help---
+ help
Support for ESP transformation offload. This makes sense
only if this system really does IPsec and want to do it
with high throughput. A typical desktop system does not
@@ -395,7 +397,7 @@ config INET_IPCOMP
tristate "IP: IPComp transformation"
select INET_XFRM_TUNNEL
select XFRM_IPCOMP
- ---help---
+ help
Support for IP Payload Compression Protocol (IPComp) (RFC3173),
typically needed for IPsec.
@@ -413,7 +415,7 @@ config INET_TUNNEL
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
- ---help---
+ help
Support for INET (TCP, DCCP, etc) socket monitoring interface used by
native Linux tools such as ss. ss is included in iproute2, currently
downloadable at:
@@ -430,7 +432,7 @@ config INET_UDP_DIAG
tristate "UDP: socket monitoring interface"
depends on INET_DIAG && (IPV6 || IPV6=n)
default n
- ---help---
+ help
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
@@ -438,7 +440,7 @@ config INET_RAW_DIAG
tristate "RAW: socket monitoring interface"
depends on INET_DIAG && (IPV6 || IPV6=n)
default n
- ---help---
+ help
Support for RAW socket monitoring interface used by the ss tool.
If unsure, say Y.
@@ -446,7 +448,7 @@ config INET_DIAG_DESTROY
bool "INET: allow privileged process to administratively close sockets"
depends on INET_DIAG
default n
- ---help---
+ help
Provides a SOCK_DESTROY operation that allows privileged processes
(e.g., a connection manager or a network administration tool such as
ss) to close sockets opened by other processes. Closing a socket in
@@ -457,7 +459,7 @@ config INET_DIAG_DESTROY
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
- ---help---
+ help
Support for selection of various TCP congestion control
modules.
@@ -471,7 +473,7 @@ if TCP_CONG_ADVANCED
config TCP_CONG_BIC
tristate "Binary Increase Congestion (BIC) control"
default m
- ---help---
+ help
BIC-TCP is a sender-side only change that ensures a linear RTT
fairness under large windows while offering both scalability and
bounded TCP-friendliness. The protocol combines two schemes
@@ -485,7 +487,7 @@ config TCP_CONG_BIC
config TCP_CONG_CUBIC
tristate "CUBIC TCP"
default y
- ---help---
+ help
This is version 2.0 of BIC-TCP which uses a cubic growth function
among other techniques.
See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
@@ -493,7 +495,7 @@ config TCP_CONG_CUBIC
config TCP_CONG_WESTWOOD
tristate "TCP Westwood+"
default m
- ---help---
+ help
TCP Westwood+ is a sender-side only modification of the TCP Reno
protocol stack that optimizes the performance of TCP congestion
control. It is based on end-to-end bandwidth estimation to set
@@ -507,7 +509,7 @@ config TCP_CONG_WESTWOOD
config TCP_CONG_HTCP
tristate "H-TCP"
default m
- ---help---
+ help
H-TCP is a send-side only modifications of the TCP Reno
protocol stack that optimizes the performance of TCP
congestion control for high speed network links. It uses a
@@ -518,7 +520,7 @@ config TCP_CONG_HTCP
config TCP_CONG_HSTCP
tristate "High Speed TCP"
default n
- ---help---
+ help
Sally Floyd's High Speed TCP (RFC 3649) congestion control.
A modification to TCP's congestion control mechanism for use
with large congestion windows. A table indicates how much to
@@ -528,7 +530,7 @@ config TCP_CONG_HSTCP
config TCP_CONG_HYBLA
tristate "TCP-Hybla congestion control algorithm"
default n
- ---help---
+ help
TCP-Hybla is a sender-side only change that eliminates penalization of
long-RTT, large-bandwidth connections, like when satellite legs are
involved, especially when sharing a common bottleneck with normal
@@ -537,7 +539,7 @@ config TCP_CONG_HYBLA
config TCP_CONG_VEGAS
tristate "TCP Vegas"
default n
- ---help---
+ help
TCP Vegas is a sender-side only change to TCP that anticipates
the onset of congestion by estimating the bandwidth. TCP Vegas
adjusts the sending rate by modifying the congestion
@@ -547,7 +549,7 @@ config TCP_CONG_VEGAS
config TCP_CONG_NV
tristate "TCP NV"
default n
- ---help---
+ help
TCP NV is a follow up to TCP Vegas. It has been modified to deal with
10G networks, measurement noise introduced by LRO, GRO and interrupt
coalescence. In addition, it will decrease its cwnd multiplicatively
@@ -563,7 +565,7 @@ config TCP_CONG_NV
config TCP_CONG_SCALABLE
tristate "Scalable TCP"
default n
- ---help---
+ help
Scalable TCP is a sender-side only change to TCP which uses a
MIMD congestion control algorithm which has some nice scaling
properties, though is known to have fairness issues.
@@ -572,7 +574,7 @@ config TCP_CONG_SCALABLE
config TCP_CONG_LP
tristate "TCP Low Priority"
default n
- ---help---
+ help
TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
to utilize only the excess network bandwidth as compared to the
``fair share`` of bandwidth as targeted by TCP.
@@ -581,7 +583,7 @@ config TCP_CONG_LP
config TCP_CONG_VENO
tristate "TCP Veno"
default n
- ---help---
+ help
TCP Veno is a sender-side only enhancement of TCP to obtain better
throughput over wireless networks. TCP Veno makes use of state
distinguishing to circumvent the difficult judgment of the packet loss
@@ -593,7 +595,7 @@ config TCP_CONG_YEAH
tristate "YeAH TCP"
select TCP_CONG_VEGAS
default n
- ---help---
+ help
YeAH-TCP is a sender-side high-speed enabled TCP congestion control
algorithm, which uses a mixed loss/delay approach to compute the
congestion window. It's design goals target high efficiency,
@@ -606,7 +608,7 @@ config TCP_CONG_YEAH
config TCP_CONG_ILLINOIS
tristate "TCP Illinois"
default n
- ---help---
+ help
TCP-Illinois is a sender-side modification of TCP Reno for
high speed long delay links. It uses round-trip-time to
adjust the alpha and beta parameters to achieve a higher average
@@ -618,7 +620,7 @@ config TCP_CONG_ILLINOIS
config TCP_CONG_DCTCP
tristate "DataCenter TCP (DCTCP)"
default n
- ---help---
+ help
DCTCP leverages Explicit Congestion Notification (ECN) in the network to
provide multi-bit feedback to the end hosts. It is designed to provide:
@@ -639,7 +641,7 @@ config TCP_CONG_DCTCP
config TCP_CONG_CDG
tristate "CAIA Delay-Gradient (CDG)"
default n
- ---help---
+ help
CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
the TCP sender in order to:
@@ -655,7 +657,7 @@ config TCP_CONG_CDG
config TCP_CONG_BBR
tristate "BBR TCP"
default n
- ---help---
+ help
BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
maximize network utilization and minimize queues. It builds an explicit
@@ -734,7 +736,7 @@ config TCP_MD5SIG
bool "TCP: MD5 Signature Option support (RFC2385)"
select CRYPTO
select CRYPTO_MD5
- ---help---
+ help
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
on the Internet.
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 0480918bfc7c..9063c6767d34 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -12,15 +12,14 @@
struct bpfilter_umh_ops bpfilter_ops;
EXPORT_SYMBOL_GPL(bpfilter_ops);
-static void bpfilter_umh_cleanup(struct umh_info *info)
+void bpfilter_umh_cleanup(struct umd_info *info)
{
- mutex_lock(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
fput(info->pipe_to_umh);
fput(info->pipe_from_umh);
- info->pid = 0;
- mutex_unlock(&bpfilter_ops.lock);
+ put_pid(info->tgid);
+ info->tgid = NULL;
}
+EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup);
static int bpfilter_mbox_request(struct sock *sk, int optname,
char __user *optval,
@@ -38,7 +37,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname,
goto out;
}
}
- if (bpfilter_ops.stop) {
+ if (bpfilter_ops.info.tgid &&
+ thread_group_exited(bpfilter_ops.info.tgid))
+ bpfilter_umh_cleanup(&bpfilter_ops.info);
+
+ if (!bpfilter_ops.info.tgid) {
err = bpfilter_ops.start();
if (err)
goto out;
@@ -69,9 +72,8 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
static int __init bpfilter_sockopt_init(void)
{
mutex_init(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
- bpfilter_ops.info.cmdline = "bpfilter_umh";
- bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
+ bpfilter_ops.info.tgid = NULL;
+ bpfilter_ops.info.driver_name = "bpfilter_umh";
return 0;
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d14133eac476..5bda5aeda579 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -361,3 +361,4 @@ module_exit(esp4_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
+MODULE_DESCRIPTION("IPV4 GSO/GRO offload support");
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e53871e4a097..1f75dc686b6b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- if (table)
+ if (table && table != RT_TABLE_MAIN)
tbl = fib_get_table(net, table);
if (tbl)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 248f1c1959a6..3c65f71d0e82 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
struct key_vector *local_l = NULL, *local_tp;
- hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ hlist_for_each_entry(fa, &l->leaf, fa_list) {
struct fib_alias *new_fa;
if (local_tb->tb_id != fa->tb_id)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dcc79ff54b41..abd083415f89 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1304,3 +1304,4 @@ module_init(fou_init);
module_exit(fou_fini);
MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Foo over UDP");
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 956a806649f7..e30515f89802 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
ipcm_init(&ipc);
inet->tos = ip_hdr(skb)->tos;
- sk->sk_mark = mark;
+ ipc.sockc.mark = mark;
daddr = ipc.addr = ip_hdr(skb)->saddr;
saddr = fib_compute_spec_dst(skb);
@@ -710,10 +710,10 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
icmp_param.skb = skb_in;
icmp_param.offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos;
- sk->sk_mark = mark;
ipcm_init(&ipc);
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts.opt;
+ ipc.sockc.mark = mark;
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
type, code, &icmp_param);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 090d3097ee15..17206677d503 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1702,7 +1702,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default;
- sk->sk_mark = fl4.flowi4_mark;
+ ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
if (unlikely(err)) {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f4f1d11eab50..0c1f36404471 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key)
{
- unsigned int hash;
struct ip_tunnel *t, *cand = NULL;
struct hlist_head *head;
+ struct net_device *ndev;
+ unsigned int hash;
hash = ip_tunnel_hash(key, remote);
head = &itn->tunnels[hash];
@@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (t && t->dev->flags & IFF_UP)
return t;
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
+ ndev = READ_ONCE(itn->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -1259,9 +1261,9 @@ void ip_tunnel_uninit(struct net_device *dev)
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
- /* fb_tunnel_dev will be unregisted in net-exit call. */
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
+ if (itn->fb_tunnel_dev == dev)
+ WRITE_ONCE(itn->fb_tunnel_dev, NULL);
dst_cache_reset(&tunnel->dst_cache);
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 181b7a2a0247..f8b419e2475c 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void)
static_branch_dec(&ip_tunnel_metadata_cnt);
}
EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb)
+{
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) &&
+ ip_hdr(skb)->version == 4)
+ return htons(ETH_P_IP);
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) &&
+ ipv6_hdr(skb)->version == 6)
+ return htons(ETH_P_IPV6);
+ return 0;
+}
+EXPORT_SYMBOL(ip_tunnel_parse_protocol);
+
+const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
+EXPORT_SYMBOL(ip_tunnel_header_ops);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1d9c8cff5ac3..460ca1099e8a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &vti_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 40fea52c8277..75d35e76bec2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = {
static void ipip_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f17b402111ce..a2f4f894be2b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -94,7 +94,7 @@ config NF_NAT_SNMP_BASIC
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
select ASN1
- ---help---
+ help
This module implements an Application Layer Gateway (ALG) for
SNMP payloads. In conjunction with NAT, it allows a network
@@ -146,7 +146,7 @@ config IP_NF_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_ECN
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_ECN.
@@ -155,7 +155,7 @@ config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP_NF_MANGLE || IP_NF_RAW
- ---help---
+ help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -166,7 +166,7 @@ config IP_NF_MATCH_TTL
tristate '"ttl" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_HL.
@@ -234,7 +234,7 @@ config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_NETMAP
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_NETMAP.
@@ -243,7 +243,7 @@ config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_REDIRECT
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_REDIRECT.
@@ -279,7 +279,7 @@ config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a `ECN' target, which can be used in the iptables mangle
table.
@@ -294,7 +294,7 @@ config IP_NF_TARGET_TTL
tristate '"TTL" target support'
depends on NETFILTER_ADVANCED && IP_NF_MANGLE
select NETFILTER_XT_TARGET_HL
- ---help---
+ help
This is a backwards-compatible option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c2670eaa74e6..5bf9fa06aee0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1797,11 +1797,22 @@ out_free:
return ret;
}
+void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
+{
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+}
+
+void ipt_unregister_table_exit(struct net *net, struct xt_table *table)
+{
+ __ipt_unregister_table(net, table);
+}
+
void ipt_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
if (ops)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ ipt_unregister_table_pre_exit(net, table, ops);
__ipt_unregister_table(net, table);
}
@@ -1958,6 +1969,8 @@ static void __exit ip_tables_fini(void)
EXPORT_SYMBOL(ipt_register_table);
EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
+EXPORT_SYMBOL(ipt_unregister_table_exit);
EXPORT_SYMBOL(ipt_do_table);
module_init(ip_tables_init);
module_exit(ip_tables_fini);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 748dc3ce58d3..f2984c7eef40 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -118,3 +118,4 @@ module_exit(synproxy_tg4_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies");
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9d54b4017e50..8f7bc1ee7453 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -72,16 +72,24 @@ static int __net_init iptable_filter_net_init(struct net *net)
return 0;
}
+static void __net_exit iptable_filter_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_filter)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter,
+ filter_ops);
+}
+
static void __net_exit iptable_filter_net_exit(struct net *net)
{
if (!net->ipv4.iptable_filter)
return;
- ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_filter);
net->ipv4.iptable_filter = NULL;
}
static struct pernet_operations iptable_filter_net_ops = {
.init = iptable_filter_net_init,
+ .pre_exit = iptable_filter_net_pre_exit,
.exit = iptable_filter_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index bb9266ea3785..f703a717ab1d 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -100,15 +100,23 @@ static int __net_init iptable_mangle_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_mangle)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle,
+ mangle_ops);
+}
+
static void __net_exit iptable_mangle_net_exit(struct net *net)
{
if (!net->ipv4.iptable_mangle)
return;
- ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_mangle);
net->ipv4.iptable_mangle = NULL;
}
static struct pernet_operations iptable_mangle_net_ops = {
+ .pre_exit = iptable_mangle_net_pre_exit,
.exit = iptable_mangle_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ad33687b7444..b0143b109f25 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -113,16 +113,22 @@ static int __net_init iptable_nat_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_nat_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.nat_table)
+ ipt_nat_unregister_lookups(net);
+}
+
static void __net_exit iptable_nat_net_exit(struct net *net)
{
if (!net->ipv4.nat_table)
return;
- ipt_nat_unregister_lookups(net);
- ipt_unregister_table(net, net->ipv4.nat_table, NULL);
+ ipt_unregister_table_exit(net, net->ipv4.nat_table);
net->ipv4.nat_table = NULL;
}
static struct pernet_operations iptable_nat_net_ops = {
+ .pre_exit = iptable_nat_net_pre_exit,
.exit = iptable_nat_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 69697eb4bfc6..9abfe6bf2cb9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -67,15 +67,23 @@ static int __net_init iptable_raw_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_raw_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_raw)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw,
+ rawtable_ops);
+}
+
static void __net_exit iptable_raw_net_exit(struct net *net)
{
if (!net->ipv4.iptable_raw)
return;
- ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_raw);
net->ipv4.iptable_raw = NULL;
}
static struct pernet_operations iptable_raw_net_ops = {
+ .pre_exit = iptable_raw_net_pre_exit,
.exit = iptable_raw_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ac633c1db97e..415c1975d770 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -62,16 +62,23 @@ static int __net_init iptable_security_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_security_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_security)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security,
+ sectbl_ops);
+}
+
static void __net_exit iptable_security_net_exit(struct net *net)
{
if (!net->ipv4.iptable_security)
return;
-
- ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_security);
net->ipv4.iptable_security = NULL;
}
static struct pernet_operations iptable_security_net_ops = {
+ .pre_exit = iptable_security_net_pre_exit,
.exit = iptable_security_net_exit,
};
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index e32e41b99f0f..aba65fe90345 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -34,3 +34,4 @@ module_exit(nf_flow_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
+MODULE_DESCRIPTION("Netfilter flow table support");
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index c94445b44d8c..2d42e4c35a20 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -84,11 +84,11 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev)
{
- __be32 uninitialized_var(daddr), uninitialized_var(saddr);
- __be16 uninitialized_var(dport), uninitialized_var(sport);
+ __be32 daddr, saddr;
+ __be16 dport, sport;
const struct iphdr *iph = ip_hdr(skb);
struct sk_buff *data_skb = NULL;
- u8 uninitialized_var(protocol);
+ u8 protocol;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
enum ip_conntrack_info ctinfo;
struct nf_conn const *ct;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index abf89b972094..bcdb37f86a94 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -107,3 +107,4 @@ module_exit(nft_dup_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
+MODULE_DESCRIPTION("IPv4 nftables packet duplication support");
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index ce294113dbcd..03df986217b7 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -210,3 +210,4 @@ module_exit(nft_fib4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
+MODULE_DESCRIPTION("nftables fib / ip route lookup support");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 7e6fd5cde50f..e408f813f5d8 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -71,3 +71,4 @@ module_exit(nft_reject_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
+MODULE_DESCRIPTION("IPv4 packet rejection for nftables");
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 400a9f89ebdb..cc8049b100b2 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure;
- if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
- goto nla_put_failure;
-
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
if (nla_put_nh_group(skb, nhg))
goto nla_put_failure;
goto out;
@@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_flag(skb, NHA_BLACKHOLE))
goto nla_put_failure;
goto out;
- } else if (!nh->is_fdb_nh) {
+ } else if (nhi->fdb_nh) {
+ if (nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
+ } else {
const struct net_device *dev;
dev = nhi->fib_nhc.nhc_dev;
@@ -385,7 +387,7 @@ errout:
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
- struct netlink_ext_ack *extack)
+ bool *is_fdb, struct netlink_ext_ack *extack)
{
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
@@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Multipath group can not be a nexthop within a group");
return false;
}
+ *is_fdb = nhg->fdb_nh;
} else {
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
@@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Blackhole nexthop can not be used in a group with more than 1 path");
return false;
}
+ *is_fdb = nhi->fdb_nh;
}
return true;
@@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
{
struct nh_info *nhi;
- if (!nh->is_fdb_nh) {
+ nhi = rtnl_dereference(nh->nh_info);
+
+ if (!nhi->fdb_nh) {
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
return -EINVAL;
}
- nhi = rtnl_dereference(nh->nh_info);
if (*nh_family == AF_UNSPEC) {
*nh_family = nhi->family;
} else if (*nh_family != nhi->family) {
@@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
+ bool is_fdb_nh;
nh = nexthop_find_by_id(net, nhg[i].id);
if (!nh) {
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
return -EINVAL;
}
- if (!valid_group_nh(nh, len, extack))
+ if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
return -EINVAL;
if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
return -EINVAL;
- if (!nhg_fdb && nh->is_fdb_nh) {
+ if (!nhg_fdb && is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
return -EINVAL;
}
@@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
if (hash > atomic_read(&nhge->upper_bound))
continue;
- if (nhge->nh->is_fdb_nh)
+ nhi = rcu_dereference(nhge->nh->nh_info);
+ if (nhi->fdb_nh)
return nhge->nh;
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- nhi = rcu_dereference(nhge->nh->nh_info);
switch (nhi->family) {
case AF_INET:
if (ipv4_good_nh(&nhi->fib_nh))
@@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;
-
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- return -EINVAL;
- }
+ bool is_fdb_nh;
/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
@@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
nhg = rtnl_dereference(nh->nh_grp);
if (nhg->has_v4)
goto no_v4_nh;
+ is_fdb_nh = nhg->fdb_nh;
} else {
nhi = rtnl_dereference(nh->nh_info);
if (nhi->family == AF_INET)
goto no_v4_nh;
+ is_fdb_nh = nhi->fdb_nh;
+ }
+
+ if (is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ return -EINVAL;
}
return 0;
@@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
return fib6_check_nexthop(new, NULL, extack);
}
-static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
struct netlink_ext_ack *extack)
{
- struct nh_info *nhi;
-
- nhi = rtnl_dereference(nh->nh_info);
if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
@@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope,
int fib_check_nexthop(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
+ struct nh_info *nhi;
int err = 0;
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- err = -EINVAL;
- goto out;
- }
-
if (nh->is_group) {
struct nh_group *nhg;
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+
if (scope == RT_SCOPE_HOST) {
NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
err = -EINVAL;
goto out;
}
- nhg = rtnl_dereference(nh->nh_grp);
/* all nexthops in a group have the same scope */
- err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
+ err = nexthop_check_scope(nhi, scope, extack);
} else {
- err = nexthop_check_scope(nh, scope, extack);
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+ err = nexthop_check_scope(nhi, scope, extack);
}
+
out:
return err;
}
@@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = nhg->has_v4;
newg->mpath = nhg->mpath;
+ newg->fdb_nh = nhg->fdb_nh;
newg->num_nh = nhg->num_nh;
/* copy old entries to new except the one getting removed */
@@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
}
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhg->fdb_nh = 1;
rcu_assign_pointer(nh->nh_grp, nhg);
@@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
goto out;
}
- if (nh->is_fdb_nh)
+ if (nhi->fdb_nh)
goto out;
/* sets nh_dev if successful */
@@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhi->fdb_nh = 1;
if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
@@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
}
/* add the entry to the device based hash */
- if (!nh->is_fdb_nh)
+ if (!nhi->fdb_nh)
nexthop_devhash_add(net, nhi);
rcu_assign_pointer(nh->nh_info, nhi);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 535427292194..df6fbefe44d4 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -786,6 +786,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
sk->sk_uid);
+ fl4.fl4_icmp_type = user_icmph.type;
+ fl4.fl4_icmp_code = user_icmph.code;
+
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1d7076b78e63..a01efa062f6b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2027,7 +2027,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
const struct sk_buff *hint)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rt = (struct rtable *)hint;
+ struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
int err = -EINVAL;
u32 tag = 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27716e4932bc..6f0caf9a866d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock,
}
EXPORT_SYMBOL(tcp_mmap);
+static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
+ struct page **pages,
+ unsigned long pages_to_map,
+ unsigned long *insert_addr,
+ u32 *length_with_pending,
+ u32 *seq,
+ struct tcp_zerocopy_receive *zc)
+{
+ unsigned long pages_remaining = pages_to_map;
+ int bytes_mapped;
+ int ret;
+
+ ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
+ bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
+ /* Even if vm_insert_pages fails, it may have partially succeeded in
+ * mapping (some but not all of the pages).
+ */
+ *seq += bytes_mapped;
+ *insert_addr += bytes_mapped;
+ if (ret) {
+ /* But if vm_insert_pages did fail, we have to unroll some state
+ * we speculatively touched before.
+ */
+ const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
+ *length_with_pending -= bytes_not_mapped;
+ zc->recv_skip_hint += bytes_not_mapped;
+ }
+ return ret;
+}
+
static int tcp_zerocopy_receive(struct sock *sk,
struct tcp_zerocopy_receive *zc)
{
unsigned long address = (unsigned long)zc->address;
u32 length = 0, seq, offset, zap_len;
+ #define PAGE_BATCH_SIZE 8
+ struct page *pages[PAGE_BATCH_SIZE];
const skb_frag_t *frags = NULL;
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
+ unsigned long pg_idx = 0;
+ unsigned long curr_addr;
struct tcp_sock *tp;
int inq;
int ret;
@@ -1762,6 +1796,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
sock_rps_record_flow(sk);
+ tp = tcp_sk(sk);
+
mmap_read_lock(current->mm);
vma = find_vma(current->mm, address);
@@ -1771,7 +1807,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
}
zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
- tp = tcp_sk(sk);
seq = tp->copied_seq;
inq = tcp_inq(sk);
zc->length = min_t(u32, zc->length, inq);
@@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk,
zc->recv_skip_hint = zc->length;
}
ret = 0;
+ curr_addr = address;
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
+ /* If we're here, finish the current batch. */
+ if (pg_idx) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages,
+ pg_idx,
+ &curr_addr,
+ &length,
+ &seq, zc);
+ if (ret)
+ goto out;
+ pg_idx = 0;
+ }
if (skb) {
if (zc->recv_skip_hint > 0)
break;
@@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
} else {
skb = tcp_recv_skb(sk, seq, &offset);
}
-
zc->recv_skip_hint = skb->len - offset;
offset -= skb_headlen(skb);
if ((int)offset < 0 || skb_has_frag_list(skb))
@@ -1817,14 +1863,24 @@ static int tcp_zerocopy_receive(struct sock *sk,
zc->recv_skip_hint -= remaining;
break;
}
- ret = vm_insert_page(vma, address + length,
- skb_frag_page(frags));
- if (ret)
- break;
+ pages[pg_idx] = skb_frag_page(frags);
+ pg_idx++;
length += PAGE_SIZE;
- seq += PAGE_SIZE;
zc->recv_skip_hint -= PAGE_SIZE;
frags++;
+ if (pg_idx == PAGE_BATCH_SIZE) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+ &curr_addr, &length,
+ &seq, zc);
+ if (ret)
+ goto out;
+ pg_idx = 0;
+ }
+ }
+ if (pg_idx) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+ &curr_addr, &length, &seq,
+ zc);
}
out:
mmap_read_unlock(current->mm);
@@ -2635,6 +2691,9 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
+ if (icsk->icsk_ca_ops->release)
+ icsk->icsk_ca_ops->release(sk);
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
@@ -3190,10 +3249,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
#ifdef CONFIG_TCP_MD5SIG
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
- if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
- err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
- else
- err = -EINVAL;
+ err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
case TCP_USER_TIMEOUT:
@@ -3977,11 +4033,14 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data);
int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
{
+ u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
struct scatterlist sg;
- sg_init_one(&sg, key->key, key->keylen);
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
- return crypto_ahash_update(hp->md5_req);
+ sg_init_one(&sg, key->key, keylen);
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
+
+ /* We use data_race() because tcp_md5_do_add() might change key->key under us */
+ return data_race(crypto_ahash_update(hp->md5_req));
}
EXPORT_SYMBOL(tcp_md5_hash_key);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 629aaa9a1eb9..7aa68f4aae6c 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
} while (i != msg_rx->sg.end);
if (unlikely(peek)) {
+ if (msg_rx == list_last_entry(&psock->ingress_msg,
+ struct sk_msg, list))
+ break;
msg_rx = list_next_entry(msg_rx, list);
continue;
}
@@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int ret = 0;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
if (!timeo)
return ret;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 3172e31987be..62878cf26d9c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -197,7 +197,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_setsockopt = 1;
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
- if (sk->sk_state != TCP_CLOSE)
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
tcp_init_congestion_control(sk);
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8f8eefd3a3ce..c7bf5b26bf0c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -432,10 +432,9 @@ static void hystart_update(struct sock *sk, u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
+ if (ca->curr_rtt > delay)
+ ca->curr_rtt = delay;
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
- if (ca->curr_rtt > delay)
- ca->curr_rtt = delay;
-
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 83330a6cb242..518f04355fbf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -261,7 +261,8 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
* cwnd may be very low (even just 1 packet), so we should ACK
* immediately.
*/
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
}
@@ -3487,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
}
}
-/* This routine deals with acks during a TLP episode.
- * We mark the end of a TLP episode on receiving TLP dupack or when
- * ack is after tlp_high_seq.
- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+/* This routine deals with acks during a TLP episode and ends an episode by
+ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
*/
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
@@ -3499,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
if (before(ack, tp->tlp_high_seq))
return;
- if (flag & FLAG_DSACKING_ACK) {
+ if (!tp->tlp_retrans) {
+ /* TLP of new data has been acknowledged */
+ tp->tlp_high_seq = 0;
+ } else if (flag & FLAG_DSACKING_ACK) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -3665,6 +3667,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_in_ack_event(sk, ack_ev_flags);
}
+ /* This is a deviation from RFC3168 since it states that:
+ * "When the TCP data sender is ready to set the CWR bit after reducing
+ * the congestion window, it SHOULD set the CWR bit only on the first
+ * new data packet that it transmits."
+ * We accept CWR on pure ACKs to be more robust
+ * with widely-deployed TCP implementations that do this.
+ */
+ tcp_ecn_accept_cwr(sk, skb);
+
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
@@ -4572,6 +4583,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
+ sk->sk_data_ready(sk);
tcp_drop(sk, skb);
return;
}
@@ -4605,7 +4617,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4689,7 +4705,11 @@ add_sack:
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
if (skb) {
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -4792,8 +4812,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
- tcp_ecn_accept_cwr(sk, skb);
-
tp->rx_opt.dsack = 0;
/* Queue data for delivery to the user.
@@ -4812,6 +4830,7 @@ queue_and_out:
sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ sk->sk_data_ready(sk);
goto drop;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad6435ba6d72..04bfcbbfee83 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1111,9 +1111,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
if (key) {
- /* Pre-existing entry - just update that one. */
- memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
+ /* Pre-existing entry - just update that one.
+ * Note that the key might be used concurrently.
+ * data_race() is telling kcsan that we do not care of
+ * key mismatches, since changing MD5 key on live flows
+ * can lead to packet drops.
+ */
+ data_race(memcpy(key->key, newkey, newkeylen));
+
+ /* Pairs with READ_ONCE() in tcp_md5_hash_key().
+ * Also note that a reader could catch new key->keylen value
+ * but old key->key[], this is the reason we use __GFP_ZERO
+ * at sock_kmalloc() time below these lines.
+ */
+ WRITE_ONCE(key->keylen, newkeylen);
+
return 0;
}
@@ -1129,7 +1141,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
rcu_assign_pointer(tp->md5sig_info, md5sig);
}
- key = sock_kmalloc(sk, sizeof(*key), gfp);
+ key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
if (!key)
return -ENOMEM;
if (!tcp_alloc_md5sig_pool()) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a50e1990a845..0bc05d68cd74 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -700,7 +700,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -715,7 +716,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
* rather than TS in order to fit in better with old,
* buggy kernels, but that was deemed to be unnecessary.
*/
- ireq->tstamp_ok &= !ireq->sack_ok;
+ if (synack_type != TCP_SYNACK_COOKIE)
+ ireq->tstamp_ok &= !ireq->sack_ok;
}
#endif
@@ -2622,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk)
int pcount;
int mss = tcp_current_mss(sk);
+ /* At most one outstanding TLP */
+ if (tp->tlp_high_seq)
+ goto rearm_timer;
+
+ tp->tlp_retrans = 0;
skb = tcp_send_head(sk);
if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
pcount = tp->packets_out;
@@ -2639,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk)
return;
}
- /* At most one outstanding TLP retransmission. */
- if (tp->tlp_high_seq)
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2664,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk)
if (__tcp_retransmit_skb(sk, skb, 1))
goto rearm_timer;
+ tp->tlp_retrans = 1;
+
+probe_sent:
/* Record snd_nxt for loss detection. */
tp->tlp_high_seq = tp->snd_nxt;
-probe_sent:
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
/* Reset s.t. tcp_rearm_rto will restart timer from now */
inet_csk(sk)->icsk_pending = 0;
@@ -3394,7 +3399,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc) + sizeof(*th);
+ foc, synack_type) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b7ebbcae497..4077d589b72e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;
@@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
+
+ result = reuseport_result ? : sk;
badness = score;
- result = sk;
}
}
return result;
@@ -2051,7 +2054,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
/*
* MIB statistics other than incrementing the error count are
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f03aece2980..f4f19e89af5e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -7,7 +7,7 @@
menuconfig IPV6
tristate "The IPv6 protocol"
default y
- ---help---
+ help
Support for IP version 6 (IPv6).
For general information about IPv6, see
@@ -23,7 +23,7 @@ if IPV6
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
- ---help---
+ help
Router Preference is an optional extension to the Router
Advertisement message which improves the ability of hosts
to pick an appropriate router, especially when the hosts
@@ -34,14 +34,14 @@ config IPV6_ROUTER_PREF
config IPV6_ROUTE_INFO
bool "IPv6: Route Information (RFC 4191) support"
depends on IPV6_ROUTER_PREF
- ---help---
+ help
Support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
bool "IPv6: Enable RFC 4429 Optimistic DAD"
- ---help---
+ help
Support for optimistic Duplicate Address Detection. It allows for
autoconfigured addresses to be used more quickly.
@@ -49,29 +49,31 @@ config IPV6_OPTIMISTIC_DAD
config INET6_AH
tristate "IPv6: AH transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_SHA1
- ---help---
- Support for IPsec AH.
+ select XFRM_AH
+ help
+ Support for IPsec AH (Authentication Header).
+
+ AH can be used with various authentication algorithms. Besides
+ enabling AH support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
config INET6_ESP
tristate "IPv6: ESP transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_AUTHENC
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_CBC
- select CRYPTO_SHA1
- select CRYPTO_DES
- select CRYPTO_ECHAINIV
- ---help---
- Support for IPsec ESP.
+ select XFRM_ESP
+ help
+ Support for IPsec ESP (Encapsulating Security Payload).
+
+ ESP can be used with various encryption and authentication algorithms.
+ Besides enabling ESP support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
@@ -80,7 +82,7 @@ config INET6_ESP_OFFLOAD
depends on INET6_ESP
select XFRM_OFFLOAD
default n
- ---help---
+ help
Support for ESP transformation offload. This makes sense
only if this system really does IPsec and want to do it
with high throughput. A typical desktop system does not
@@ -104,7 +106,7 @@ config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
select INET6_XFRM_TUNNEL
select XFRM_IPCOMP
- ---help---
+ help
Support for IP Payload Compression Protocol (IPComp) (RFC3173),
typically needed for IPsec.
@@ -113,7 +115,7 @@ config INET6_IPCOMP
config IPV6_MIP6
tristate "IPv6: Mobility"
select XFRM
- ---help---
+ help
Support for IPv6 Mobility described in RFC 3775.
If unsure, say N.
@@ -123,7 +125,7 @@ config IPV6_ILA
depends on NETFILTER
select DST_CACHE
select LWTUNNEL
- ---help---
+ help
Support for IPv6 Identifier Locator Addressing (ILA).
ILA is a mechanism to do network virtualization without
@@ -153,7 +155,7 @@ tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
select XFRM
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This can be used with xfrm mode tunnel to give
@@ -166,7 +168,7 @@ config IPV6_SIT
select NET_IP_TUNNEL
select IPV6_NDISC_NODETYPE
default y
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This driver implements encapsulation of IPv6
@@ -179,7 +181,7 @@ config IPV6_SIT_6RD
bool "IPv6: IPv6 Rapid Deployment (6RD)"
depends on IPV6_SIT
default n
- ---help---
+ help
IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
deploy IPv6 unicast service to IPv4 sites to which it provides
@@ -202,7 +204,7 @@ config IPV6_TUNNEL
select INET6_TUNNEL
select DST_CACHE
select GRO_CELLS
- ---help---
+ help
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473.
@@ -213,7 +215,7 @@ config IPV6_GRE
select IPV6_TUNNEL
select NET_IP_TUNNEL
depends on NET_IPGRE_DEMUX
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This particular tunneling driver implements
@@ -238,13 +240,13 @@ config IPV6_FOU_TUNNEL
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
select FIB_RULES
- ---help---
+ help
Support multiple routing tables.
config IPV6_SUBTREES
bool "IPv6: source address based routing"
depends on IPV6_MULTIPLE_TABLES
- ---help---
+ help
Enable routing by source address or prefix.
The destination address is still the primary routing key, so mixing
@@ -259,7 +261,7 @@ config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
select IP_MROUTE_COMMON
- ---help---
+ help
Support for IPv6 multicast forwarding.
If unsure, say N.
@@ -280,7 +282,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES
config IPV6_PIMSM_V2
bool "IPv6: PIM-SM version 2 support"
depends on IPV6_MROUTE
- ---help---
+ help
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
If unsure, say N.
@@ -290,7 +292,7 @@ config IPV6_SEG6_LWTUNNEL
select LWTUNNEL
select DST_CACHE
select IPV6_MULTIPLE_TABLES
- ---help---
+ help
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
tunnels mechanism. Also enable support for advanced local
@@ -304,7 +306,7 @@ config IPV6_SEG6_HMAC
select CRYPTO_HMAC
select CRYPTO_SHA1
select CRYPTO_SHA256
- ---help---
+ help
Support for HMAC signature generation and verification
of SR-enabled packets.
@@ -319,7 +321,7 @@ config IPV6_RPL_LWTUNNEL
bool "IPv6: RPL Source Routing Header support"
depends on IPV6
select LWTUNNEL
- ---help---
+ help
Support for RFC6554 RPL Source Routing Header using the lightweight
tunnels mechanism.
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 893261230ffc..dacdea7fcb62 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
-void ipv6_sock_ac_close(struct sock *sk)
+void __ipv6_sock_ac_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev = NULL;
@@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk)
struct net *net = sock_net(sk);
int prev_index;
- if (!np->ipv6_ac_list)
- return;
-
- rtnl_lock();
+ ASSERT_RTNL();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
@@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (!np->ipv6_ac_list)
+ return;
+ rtnl_lock();
+ __ipv6_sock_ac_close(sk);
rtnl_unlock();
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c43592771126..52c2f063529f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -805,10 +805,17 @@ int esp6_input_done2(struct sk_buff *skb, int err)
if (x->encap) {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int offset = skb_network_offset(skb) + sizeof(*ip6h);
struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
- struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
- __be16 source;
+ u8 nexthdr = ip6h->nexthdr;
+ __be16 frag_off, source;
+ struct udphdr *uh;
+ struct tcphdr *th;
+
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+ uh = (void *)(skb->data + offset);
+ th = (void *)(skb->data + offset);
+ hdr_len += offset;
switch (x->encap->encap_type) {
case TCP_ENCAP_ESPINTCP:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 55addea1948f..1ca516fb30e1 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -395,3 +395,4 @@ module_exit(esp6_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
+MODULE_DESCRIPTION("IPV6 GSO/GRO offload support");
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 091f94184dc1..430518ae26fa 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -224,3 +224,4 @@ module_init(fou6_init);
module_exit(fou6_fini);
MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Foo over UDP (IPv6)");
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fc5000370030..9df8737ae0d3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -566,7 +566,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
- sk->sk_mark = mark;
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -583,6 +582,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.flowi6_oif = np->ucast_oif;
ipcm6_init_sk(&ipc6, np);
+ ipc6.sockc.mark = mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
@@ -751,7 +751,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
sk = icmpv6_xmit_lock(net);
if (!sk)
goto out_bh_enable;
- sk->sk_mark = mark;
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -779,6 +778,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipcm6_init_sk(&ipc6, np);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.sockc.mark = mark;
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 257d2b681246..36c58aa257e8 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -120,3 +120,4 @@ module_init(ila_init);
module_exit(ila_fini);
MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6: Identifier Locator Addressing (ILA)");
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index ce4fbba4acce..73bb047e6037 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -535,7 +535,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
- int uninitialized_var(err);
+ int err;
struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 781ca8c07a0d..3a57fb9ce049 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
+ struct net_device *ndev;
for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
@@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (t && t->dev->flags & IFF_UP)
return t;
- dev = ign->fb_tunnel_dev;
- if (dev && dev->flags & IFF_UP)
- return netdev_priv(dev);
+ ndev = READ_ONCE(ign->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
+ if (ign->fb_tunnel_dev == dev)
+ WRITE_ONCE(ign->fb_tunnel_dev, NULL);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
@@ -1559,17 +1562,18 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
static int __net_init ip6gre_init_net(struct net *net)
{
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *ndev;
int err;
if (!net_has_fallback_tunnels(net))
return 0;
- ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
- NET_NAME_UNKNOWN,
- ip6gre_tunnel_setup);
- if (!ign->fb_tunnel_dev) {
+ ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+ NET_NAME_UNKNOWN, ip6gre_tunnel_setup);
+ if (!ndev) {
err = -ENOMEM;
goto err_alloc_dev;
}
+ ign->fb_tunnel_dev = ndev;
dev_net_set(ign->fb_tunnel_dev, net);
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
@@ -1589,7 +1593,7 @@ static int __net_init ip6gre_init_net(struct net *net)
return 0;
err_reg_dev:
- free_netdev(ign->fb_tunnel_dev);
+ free_netdev(ndev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 821d96c720b9..a18c378ca5f4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &ip6_tnl_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6_dev_free;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 1147f647b9a0..0d964160a9dd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = {
static void vti6_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &vti6_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = vti6_dev_free;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 20576e87a5f7..76f9e41859a2 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -240,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
fl6_free_socklist(sk);
__ipv6_sock_mc_close(sk);
+ __ipv6_sock_ac_close(sk);
/*
* Sock is moving from IPv6 to IPv4 (sk_prot), so
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7e12d2114158..8cd2782a31e4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2615,6 +2615,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
idev->mc_list = i->next;
write_unlock_bh(&idev->lock);
+ ip6_mc_clear_src(i);
ma_put(i);
write_lock_bh(&idev->lock);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0594131fa46d..262bb51a2d99 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -127,7 +127,7 @@ config IP6_NF_MATCH_HL
tristate '"hl" hoplimit match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_HL.
@@ -153,7 +153,7 @@ config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP6_NF_MANGLE || IP6_NF_RAW
- ---help---
+ help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -183,7 +183,7 @@ config IP6_NF_TARGET_HL
tristate '"HL" hoplimit target support'
depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
select NETFILTER_XT_TARGET_HL
- ---help---
+ help
This is a backwards-compatible option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e27393498ecb..e96a431549bc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1807,11 +1807,22 @@ out_free:
return ret;
}
+void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
+{
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+}
+
+void ip6t_unregister_table_exit(struct net *net, struct xt_table *table)
+{
+ __ip6t_unregister_table(net, table);
+}
+
void ip6t_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
if (ops)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ ip6t_unregister_table_pre_exit(net, table, ops);
__ip6t_unregister_table(net, table);
}
@@ -1969,6 +1980,8 @@ static void __exit ip6_tables_fini(void)
EXPORT_SYMBOL(ip6t_register_table);
EXPORT_SYMBOL(ip6t_unregister_table);
+EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
+EXPORT_SYMBOL(ip6t_unregister_table_exit);
EXPORT_SYMBOL(ip6t_do_table);
module_init(ip6_tables_init);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index fd1f52a21bf1..d51d0c3e5fe9 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -121,3 +121,4 @@ module_exit(synproxy_tg6_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 32667f5d5a33..88337b51ffbf 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -73,16 +73,24 @@ static int __net_init ip6table_filter_net_init(struct net *net)
return 0;
}
+static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_filter)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter,
+ filter_ops);
+}
+
static void __net_exit ip6table_filter_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_filter)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter);
net->ipv6.ip6table_filter = NULL;
}
static struct pernet_operations ip6table_filter_net_ops = {
.init = ip6table_filter_net_init,
+ .pre_exit = ip6table_filter_net_pre_exit,
.exit = ip6table_filter_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 070afb97fa2b..1a2748611e00 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -93,16 +93,24 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_mangle)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle,
+ mangle_ops);
+}
+
static void __net_exit ip6table_mangle_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_mangle)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle);
net->ipv6.ip6table_mangle = NULL;
}
static struct pernet_operations ip6table_mangle_net_ops = {
+ .pre_exit = ip6table_mangle_net_pre_exit,
.exit = ip6table_mangle_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 0f4875952efc..0a23265e3caa 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -114,16 +114,22 @@ static int __net_init ip6table_nat_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_nat)
+ ip6t_nat_unregister_lookups(net);
+}
+
static void __net_exit ip6table_nat_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_nat)
return;
- ip6t_nat_unregister_lookups(net);
- ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
net->ipv6.ip6table_nat = NULL;
}
static struct pernet_operations ip6table_nat_net_ops = {
+ .pre_exit = ip6table_nat_net_pre_exit,
.exit = ip6table_nat_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index a22100b1cf2c..8f9e742226f7 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -66,15 +66,23 @@ static int __net_init ip6table_raw_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_raw)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw,
+ rawtable_ops);
+}
+
static void __net_exit ip6table_raw_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_raw)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw);
net->ipv6.ip6table_raw = NULL;
}
static struct pernet_operations ip6table_raw_net_ops = {
+ .pre_exit = ip6table_raw_net_pre_exit,
.exit = ip6table_raw_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index a74335fe2bd9..5e8c48fed032 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -61,15 +61,23 @@ static int __net_init ip6table_security_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_security_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_security)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security,
+ sectbl_ops);
+}
+
static void __net_exit ip6table_security_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_security)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_security);
net->ipv6.ip6table_security = NULL;
}
static struct pernet_operations ip6table_security_net_ops = {
+ .pre_exit = ip6table_security_net_pre_exit,
.exit = ip6table_security_net_exit,
};
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index a8566ee12e83..667b8af2546a 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -35,3 +35,4 @@ module_exit(nf_flow_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
+MODULE_DESCRIPTION("Netfilter flow table IPv6 module");
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index b9df879c48d3..6fd54744cbc3 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -97,7 +97,7 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
const struct net_device *indev)
{
- __be16 uninitialized_var(dport), uninitialized_var(sport);
+ __be16 dport, sport;
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
struct sk_buff *data_skb = NULL;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 2af32200507d..8b5193efb1f1 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -105,3 +105,4 @@ module_exit(nft_dup_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
+MODULE_DESCRIPTION("IPv6 nftables packet duplication support");
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 7ece86afd079..e204163c7036 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -255,3 +255,4 @@ module_exit(nft_fib6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
+MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 680a28ce29fd..c1098a1968e1 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -72,3 +72,4 @@ module_exit(nft_reject_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
+MODULE_DESCRIPTION("IPv6 packet rejection for nftables");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 82cbb46a2a4f..4c36bd0c7930 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -431,9 +431,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
- if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
+ if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
+ if (match->nh && have_oif_match && res->nh)
+ return;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
@@ -3402,7 +3405,7 @@ static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
if ((flags & RTF_REJECT) ||
(dev && (dev->flags & IFF_LOOPBACK) &&
!(addr_type & IPV6_ADDR_LOOPBACK) &&
- !(flags & RTF_LOCAL)))
+ !(flags & (RTF_ANYCAST | RTF_LOCAL))))
return true;
return false;
@@ -3682,14 +3685,14 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->fib6_src.plen = cfg->fc_src_len;
#endif
if (nh) {
- if (!nexthop_get(nh)) {
- NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
- goto out;
- }
if (rt->fib6_src.plen) {
NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
goto out;
}
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ goto out;
+ }
rt->nh = nh;
fib6_nh = nexthop_fib6_nh(rt->nh);
} else {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1fbb4dfbb191..5e2c34c0ac97 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->netdev_ops = &ipip6_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ipip6_dev_free;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7d4151747340..a8d74f44056a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
int dif, int sdif, struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;
@@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
- result = sk;
+
+ result = reuseport_result ? : sk;
badness = score;
}
}
@@ -643,7 +646,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
if (up->pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 19250a0c85d3..cd2e468852e7 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -105,7 +105,7 @@ static LIST_HEAD(iucv_task_queue);
* The tasklet for fast delivery of iucv interrupts.
*/
static void iucv_tasklet_fn(unsigned long);
-static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0);
+static DECLARE_TASKLET_OLD(iucv_tasklet, iucv_tasklet_fn);
/*
* Queue of interrupt buffers for delivery via a work queue
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index bf7e970fad65..16f39f2565d9 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig
@@ -5,7 +5,7 @@ config AF_KCM
depends on INET
select BPF_SYSCALL
select STREAM_PARSER
- ---help---
+ help
KCM (Kernel Connection Multiplexor) sockets provide a method
for multiplexing messages of a message based application
protocol over kernel connectons (e.g. TCP connections).
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b67ed3a8486c..a915bc86620a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+ if ((xfilter->sadb_x_filter_splen >=
+ (sizeof(xfrm_address_t) << 3)) ||
+ (xfilter->sadb_x_filter_dplen >=
+ (sizeof(xfrm_address_t) << 3))) {
+ mutex_unlock(&pfk->dump_lock);
+ return -EINVAL;
+ }
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL) {
mutex_unlock(&pfk->dump_lock);
@@ -2400,7 +2407,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
return err;
}
- xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
1, &err);
security_xfrm_policy_free(pol_ctx);
@@ -2651,7 +2658,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -EINVAL;
delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
- xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
dir, pol->sadb_x_policy_id, delete, &err);
if (xp == NULL)
return -ENOENT;
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 655e0646895b..b7856748e960 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -8,7 +8,7 @@ menuconfig L2TP
depends on (IPV6 || IPV6=n)
depends on INET
select NET_UDP_TUNNEL
- ---help---
+ help
Layer Two Tunneling Protocol
From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6d7ef78c88af..6434d17e6e8e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1028,6 +1028,7 @@ static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->ignore_df = 1;
+ skb_dst_drop(skb);
#if IS_ENABLED(CONFIG_IPV6)
if (l2tp_sk_is_v6(tunnel->sock))
error = inet6_csk_xmit(tunnel->sock, skb, NULL);
@@ -1099,10 +1100,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
goto out_unlock;
}
- /* Get routing info from the tunnel socket */
- skb_dst_drop(skb);
- skb_dst_set(skb, sk_dst_check(sk, 0));
-
inet = inet_sk(sk);
fl = &inet->cork.fl;
switch (tunnel->encap) {
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
index de186dff8f63..2b2861e1fb7d 100644
--- a/net/l3mdev/Kconfig
+++ b/net/l3mdev/Kconfig
@@ -6,6 +6,6 @@
config NET_L3_MASTER_DEV
bool "L3 Master device support"
depends on INET || IPV6
- ---help---
+ help
This module provides glue between core networking code and device
drivers to support L3 master devices like VRF.
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index 5b50e8d64f26..da87b47f0dff 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig
@@ -5,7 +5,7 @@
config LAPB
tristate "LAPB Data Link Driver"
- ---help---
+ help
Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
the lower) part of the X.25 protocol. It offers a reliable
connection service to exchange data frames with one other host, and
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 54fb8d452a7b..6e53e43c1907 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -273,6 +273,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
if (!sock_flag(sk, SOCK_ZAPPED))
goto out;
+ if (!addr->sllc_arphrd)
+ addr->sllc_arphrd = ARPHRD_ETHER;
+ if (addr->sllc_arphrd != ARPHRD_ETHER)
+ goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
@@ -328,7 +332,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
- if (unlikely(addr->sllc_family != AF_LLC))
+ if (!addr->sllc_arphrd)
+ addr->sllc_arphrd = ARPHRD_ETHER;
+ if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER))
goto out;
dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
rc = -ENODEV;
@@ -336,8 +342,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
if (llc->dev) {
- if (!addr->sllc_arphrd)
- addr->sllc_arphrd = llc->dev->type;
if (is_zero_ether_addr(addr->sllc_mac))
memcpy(addr->sllc_mac, llc->dev->dev_addr,
IFHWADDRLEN);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 0c93b1b7a826..cd9a9bd242ba 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -9,7 +9,7 @@ config MAC80211
select CRYPTO_GCM
select CRYPTO_CMAC
select CRC32
- ---help---
+ help
This option enables the hardware independent IEEE 802.11
networking stack.
@@ -25,14 +25,14 @@ config MAC80211_RC_MINSTREL
bool "Minstrel" if EXPERT
select MAC80211_HAS_RC
default y
- ---help---
+ help
This option enables the 'minstrel' TX rate control algorithm
choice
prompt "Default rate control algorithm"
depends on MAC80211_HAS_RC
default MAC80211_RC_DEFAULT_MINSTREL
- ---help---
+ help
This option selects the default rate control algorithm
mac80211 will use. Note that this default can still be
overridden through the ieee80211_default_rc_algo module
@@ -41,7 +41,7 @@ choice
config MAC80211_RC_DEFAULT_MINSTREL
bool "Minstrel"
depends on MAC80211_RC_MINSTREL
- ---help---
+ help
Select Minstrel as the default rate control algorithm.
@@ -60,7 +60,7 @@ comment "Some wireless drivers require a rate control algorithm"
config MAC80211_MESH
bool "Enable mac80211 mesh networking support"
depends on MAC80211
- ---help---
+ help
Select this option to enable 802.11 mesh operation in mac80211
drivers that support it. 802.11 mesh connects multiple stations
over (possibly multi-hop) wireless links to form a single logical
@@ -71,14 +71,14 @@ config MAC80211_LEDS
depends on MAC80211
depends on LEDS_CLASS
select LEDS_TRIGGERS
- ---help---
+ help
This option enables a few LED triggers for different
packet receive/transmit events.
config MAC80211_DEBUGFS
bool "Export mac80211 internals in DebugFS"
depends on MAC80211 && DEBUG_FS
- ---help---
+ help
Select this to see extensive information about
the internal state of mac80211 in debugfs.
@@ -87,7 +87,7 @@ config MAC80211_DEBUGFS
config MAC80211_MESSAGE_TRACING
bool "Trace all mac80211 debug messages"
depends on MAC80211
- ---help---
+ help
Select this option to have mac80211 register the
mac80211_msg trace subsystem with tracepoints to
collect all debugging messages, independent of
@@ -100,13 +100,13 @@ config MAC80211_MESSAGE_TRACING
menuconfig MAC80211_DEBUG_MENU
bool "Select mac80211 debugging features"
depends on MAC80211
- ---help---
+ help
This option collects various mac80211 debug settings.
config MAC80211_NOINLINE
bool "Do not inline TX/RX handlers"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
This option affects code generation in mac80211, when
selected some functions are marked "noinline" to allow
easier debugging of problems in the transmit and receive
@@ -122,7 +122,7 @@ config MAC80211_NOINLINE
config MAC80211_VERBOSE_DEBUG
bool "Verbose debugging output"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
many debugging messages. It should not be selected
on production systems as some of the messages are
@@ -133,7 +133,7 @@ config MAC80211_VERBOSE_DEBUG
config MAC80211_MLME_DEBUG
bool "Verbose managed MLME output"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
debugging messages for the managed-mode MLME. It
should not be selected on production systems as some
@@ -144,7 +144,7 @@ config MAC80211_MLME_DEBUG
config MAC80211_STA_DEBUG
bool "Verbose station debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
debugging messages for station addition/removal.
@@ -153,7 +153,7 @@ config MAC80211_STA_DEBUG
config MAC80211_HT_DEBUG
bool "Verbose HT debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
This option enables 802.11n High Throughput features
debug tracing output.
@@ -165,7 +165,7 @@ config MAC80211_HT_DEBUG
config MAC80211_OCB_DEBUG
bool "Verbose OCB debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
very verbose OCB debugging messages. It should not
be selected on production systems as those messages
@@ -176,7 +176,7 @@ config MAC80211_OCB_DEBUG
config MAC80211_IBSS_DEBUG
bool "Verbose IBSS debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
very verbose IBSS debugging messages. It should not
be selected on production systems as those messages
@@ -187,7 +187,7 @@ config MAC80211_IBSS_DEBUG
config MAC80211_PS_DEBUG
bool "Verbose powersave mode debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose power save mode debugging messages (when mac80211
is an AP and has power saving stations.)
@@ -200,7 +200,7 @@ config MAC80211_MPL_DEBUG
bool "Verbose mesh peer link debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh peer link debugging messages (when mac80211
is taking part in a mesh network).
@@ -213,7 +213,7 @@ config MAC80211_MPATH_DEBUG
bool "Verbose mesh path debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh path selection debugging messages (when mac80211
is taking part in a mesh network).
@@ -226,7 +226,7 @@ config MAC80211_MHWMP_DEBUG
bool "Verbose mesh HWMP routing debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh routing (HWMP) debugging messages (when mac80211
is taking part in a mesh network).
@@ -239,7 +239,7 @@ config MAC80211_MESH_SYNC_DEBUG
bool "Verbose mesh synchronization debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
synchronization debugging messages (when mac80211 is taking part in a
mesh network).
@@ -250,7 +250,7 @@ config MAC80211_MESH_CSA_DEBUG
bool "Verbose mesh channel switch debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
channel switch debugging messages (when mac80211 is taking part in a
mesh network).
@@ -261,7 +261,7 @@ config MAC80211_MESH_PS_DEBUG
bool "Verbose mesh powersave debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
powersave debugging messages (when mac80211 is taking part in a
mesh network).
@@ -271,7 +271,7 @@ config MAC80211_MESH_PS_DEBUG
config MAC80211_TDLS_DEBUG
bool "Verbose TDLS debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose TDLS selection debugging messages (when mac80211
is a TDLS STA).
@@ -284,7 +284,7 @@ config MAC80211_DEBUG_COUNTERS
bool "Extra statistics for TX/RX debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_DEBUGFS
- ---help---
+ help
Selecting this option causes mac80211 to keep additional
and very verbose statistics about TX and RX handler use
as well as a few selected dot11 counters. These will be
@@ -298,7 +298,7 @@ config MAC80211_DEBUG_COUNTERS
config MAC80211_STA_HASH_MAX_SIZE
int "Station hash table maximum size" if MAC80211_DEBUG_MENU
default 0
- ---help---
+ help
Setting this option to a low value (e.g. 4) allows testing the
hash table with collisions relatively deterministically (just
connect more stations than the number selected here.)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9b360544ad6f..1079a07e43e4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2166,6 +2166,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
ieee80211_stop_mesh(sdata);
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
+ kfree(sdata->u.mesh.ie);
mutex_unlock(&sdata->local->mtx);
return 0;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5f1ca25b6c97..e88beb3ff6db 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -617,6 +617,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
+ struct ieee80211_supported_band *sband;
+ const struct ieee80211_sband_iftype_data *iftd;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
+ iftd = ieee80211_get_sband_iftype_data(sband,
+ NL80211_IFTYPE_MESH_POINT);
+ /* The device doesn't support HE in mesh mode or at all */
+ if (!iftd)
+ return 0;
+
ieee80211_ie_build_he_6ghz_cap(sdata, skb);
return 0;
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index aa5150929996..02cde0fd08fe 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1105,11 +1105,8 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
ttl, lifetime, 0, ifmsh->preq_id++, sdata);
spin_lock_bh(&mpath->state_lock);
- if (mpath->flags & MESH_PATH_DELETED) {
- spin_unlock_bh(&mpath->state_lock);
- goto enddiscovery;
- }
- mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
+ if (!(mpath->flags & MESH_PATH_DELETED))
+ mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
spin_unlock_bh(&mpath->state_lock);
enddiscovery:
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 117519bf33d6..aca608ae313f 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -521,6 +521,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
del_timer_sync(&mpath->timer);
atomic_dec(&sdata->u.mesh.mpaths);
atomic_dec(&tbl->entries);
+ mesh_path_flush_pending(mpath);
kfree_rcu(mpath, rcu);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5820ef02a587..b2a9d47cf86d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -167,6 +167,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ret = IEEE80211_STA_DISABLE_HT |
IEEE80211_STA_DISABLE_VHT |
IEEE80211_STA_DISABLE_HE;
+ else
+ ret = 0;
vht_chandef = *chandef;
goto out;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 21854a61a2b7..5c5af4b5fc08 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2396,6 +2396,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
{
+ struct ieee80211_hdr *hdr = (void *)rx->skb->data;
struct sk_buff *skb = rx->skb;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -2406,6 +2407,31 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
if (status->flag & RX_FLAG_DECRYPTED)
return 0;
+ /* check mesh EAPOL frames first */
+ if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) &&
+ ieee80211_is_data(fc))) {
+ struct ieee80211s_hdr *mesh_hdr;
+ u16 hdr_len = ieee80211_hdrlen(fc);
+ u16 ethertype_offset;
+ __be16 ethertype;
+
+ if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr))
+ goto drop_check;
+
+ /* make sure fixed part of mesh header is there, also checks skb len */
+ if (!pskb_may_pull(rx->skb, hdr_len + 6))
+ goto drop_check;
+
+ mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len);
+ ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) +
+ sizeof(rfc1042_header);
+
+ if (skb_copy_bits(rx->skb, ethertype_offset, &ethertype, 2) == 0 &&
+ ethertype == rx->sdata->control_port_protocol)
+ return 0;
+ }
+
+drop_check:
/* Drop unencrypted frames if key is set. */
if (unlikely(!ieee80211_has_protected(fc) &&
!ieee80211_is_any_nullfunc(fc) &&
@@ -4694,7 +4720,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
* rate_idx is MCS index, which can be [0-76]
* as documented on:
*
- * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
+ * https://wireless.wiki.kernel.org/en/developers/Documentation/ieee80211/802.11n
*
* Anything else would be some sort of driver or
* hardware error. The driver should catch hardware
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index cd8487bc6fc2..af4cc5fb678e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
if (sta) {
tx_pending = atomic_sub_return(tx_airtime,
&sta->airtime[ac].aql_tx_pending);
- if (WARN_ONCE(tx_pending < 0,
- "STA %pM AC %d txq pending airtime underflow: %u, %u",
- sta->addr, ac, tx_pending, tx_airtime))
+ if (tx_pending < 0)
atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
tx_pending, 0);
}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 7b1bacac39c6..cbc40b358ba2 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -639,11 +639,23 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
+ __be16 ethertype = 0;
+
+ if (skb->len >= ETH_HLEN && skb->protocol == cpu_to_be16(ETH_P_802_3))
+ skb_copy_bits(skb, 2 * ETH_ALEN, &ethertype, ETH_TLEN);
rcu_read_lock();
sdata = ieee80211_sdata_from_skb(local, skb);
if (sdata) {
- if (ieee80211_is_any_nullfunc(hdr->frame_control))
+ if (ethertype == sdata->control_port_protocol ||
+ ethertype == cpu_to_be16(ETH_P_PREAUTH))
+ cfg80211_control_port_tx_status(&sdata->wdev,
+ cookie,
+ skb->data,
+ skb->len,
+ acked,
+ GFP_ATOMIC);
+ else if (ieee80211_is_any_nullfunc(hdr->frame_control))
cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked,
info->status.ack_signal,
@@ -654,12 +666,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
skb->data, skb->len,
acked, GFP_ATOMIC);
else
- cfg80211_control_port_tx_status(&sdata->wdev,
- cookie,
- skb->data,
- skb->len,
- acked,
- GFP_ATOMIC);
+ pr_warn("Unknown status report in ack skb\n");
+
}
rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e9ce658141f5..3529d1368068 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3996,6 +3996,9 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
skb_list_walk_safe(skb, skb, next) {
skb_mark_not_on_list(skb);
+ if (skb->protocol == sdata->control_port_protocol)
+ ctrl_flags |= IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP;
+
skb = ieee80211_build_hdr(sdata, skb, info_flags,
sta, ctrl_flags, cookie);
if (IS_ERR(skb)) {
@@ -4206,7 +4209,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
(!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER)))
ra = sdata->u.mgd.bssid;
- if (!is_valid_ether_addr(ra))
+ if (is_zero_ether_addr(ra))
goto out_free;
multicast = is_multicast_ether_addr(ra);
@@ -4227,11 +4230,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
goto out_free;
+ memset(info, 0, sizeof(*info));
+
if (unlikely(!multicast && skb->sk &&
skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- ieee80211_store_ack_skb(local, skb, &info->flags, NULL);
-
- memset(info, 0, sizeof(*info));
+ info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
+ &info->flags, NULL);
if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) {
if (sdata->control_port_no_encrypt)
@@ -5371,7 +5375,8 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
return -EINVAL;
if (proto == sdata->control_port_protocol)
- ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
+ ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO |
+ IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP;
if (unencrypted)
flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 21c94094a699..dd9f5c7a1ade 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2878,6 +2878,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!iftd))
return;
+ /* Check for device HE 6 GHz capability before adding element */
+ if (!iftd->he_6ghz_capa.capa)
+ return;
+
cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index 742624e4f7bb..901167b1e6f5 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -8,7 +8,7 @@ config MAC802154
select CRYPTO_CCM
select CRYPTO_CTR
select CRYPTO_AES
- ---help---
+ help
This option enables the hardware independent IEEE 802.15.4
networking stack for SoftMAC devices (the ones implementing
only PHY level of IEEE 802.15.4 standard).
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index d1ad69b7942a..d672ab72ab12 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -6,7 +6,7 @@
menuconfig MPLS
bool "MultiProtocol Label Switching"
default n
- ---help---
+ help
MultiProtocol Label Switching routes packets through logical
circuits. Originally conceived as a way of routing packets at
hardware speeds (before hardware was capable of routing ipv4 packets),
@@ -27,13 +27,13 @@ config MPLS_ROUTING
tristate "MPLS: routing support"
depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
depends on PROC_SYSCTL
- ---help---
+ help
Add support for forwarding of mpls packets.
config MPLS_IPTUNNEL
tristate "MPLS: IP over MPLS tunnel support"
depends on LWTUNNEL && MPLS_ROUTING
- ---help---
+ help
mpls ip tunnel support.
endif # MPLS
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index 3d980713a9e2..82bd2b54d741 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -32,11 +32,8 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
{
__be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
__be64 input = cpu_to_be64(key);
- struct sha256_state state;
- sha256_init(&state);
- sha256_update(&state, (__force u8 *)&input, sizeof(input));
- sha256_final(&state, (u8 *)mptcp_hashed_key);
+ sha256((__force u8 *)&input, sizeof(input), (u8 *)mptcp_hashed_key);
if (token)
*token = be32_to_cpu(mptcp_hashed_key[0]);
@@ -47,7 +44,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
- struct sha256_state state;
u8 key1be[8];
u8 key2be[8];
int i;
@@ -67,13 +63,10 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
- sha256_init(&state);
- sha256_update(&state, input, SHA256_BLOCK_SIZE + len);
-
/* emit sha256(K1 || msg) on the second input block, so we can
* reuse 'input' for the last hashing
*/
- sha256_final(&state, &input[SHA256_BLOCK_SIZE]);
+ sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]);
/* Prepare second part of hmac */
memset(input, 0x5C, SHA256_BLOCK_SIZE);
@@ -82,9 +75,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
- sha256_init(&state);
- sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE);
- sha256_final(&state, (u8 *)hmac);
+ sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
}
#ifdef CONFIG_MPTCP_HMAC_TEST
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902..8f940be42f98 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
break;
+ ptr++;
+
mp_opt->rm_addr = 1;
mp_opt->rm_id = *ptr++;
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
@@ -334,9 +336,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
*/
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
- pr_debug("local_key=%llu", subflow->local_key);
opts->suboptions = OPTION_MPTCP_MPC_SYN;
- opts->sndr_key = subflow->local_key;
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -449,9 +449,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
}
static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
- struct mptcp_ext *ext)
+ struct sk_buff *skb, struct mptcp_ext *ext)
{
- if (!ext->use_map) {
+ if (!ext->use_map || !skb->len) {
/* RFC6824 requires a DSS mapping with specific values
* if DATA_FIN is set but no data payload is mapped
*/
@@ -503,7 +503,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy = *mpext;
if (skb && tcp_fin && subflow->data_fin_tx_enable)
- mptcp_write_data_fin(subflow, &opts->ext_copy);
+ mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 14b253d10ccf..c0abe738e7d3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -374,6 +374,27 @@ void mptcp_subflow_eof(struct sock *sk)
sock_hold(sk);
}
+static void mptcp_check_for_eof(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int receivers = 0;
+
+ mptcp_for_each_subflow(msk, subflow)
+ receivers += !subflow->rx_eof;
+
+ if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+ /* hopefully temporary hack: propagate shutdown status
+ * to msk, when all subflows agree on it
+ */
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+
+ smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk->sk_data_ready(sk);
+ }
+}
+
static void mptcp_stop_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1011,6 +1032,9 @@ fallback:
break;
}
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
@@ -1148,27 +1172,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
-
- if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- sk->sk_shutdown |= RCV_SHUTDOWN;
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sk->sk_data_ready(sk);
- }
-}
-
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1830,7 +1833,7 @@ do_connect:
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
- if (!err || err == EINPROGRESS)
+ if (!err || err == -EINPROGRESS)
mptcp_copy_inaddrs(sock->sk, ssock->sk);
else
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 809687d3f410..c6eeaf3e8dcb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -135,8 +135,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
((nib & 0xF) << 8) | field);
}
-#define MPTCP_PM_MAX_ADDR 4
-
struct mptcp_addr_info {
sa_family_t family;
__be16 port;
@@ -234,10 +232,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (list_empty(&msk->rtx_queue))
- return NULL;
-
- return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
+ return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
struct mptcp_subflow_request_sock {
@@ -254,6 +249,7 @@ struct mptcp_subflow_request_sock {
u64 thmac;
u32 local_nonce;
u32 remote_nonce;
+ struct mptcp_sock *msk;
};
static inline struct mptcp_subflow_request_sock *
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 493b98a0825c..3838a0b3a21f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -69,6 +69,9 @@ static void subflow_req_destructor(struct request_sock *req)
pr_debug("subflow_req=%p", subflow_req);
+ if (subflow_req->msk)
+ sock_put((struct sock *)subflow_req->msk);
+
if (subflow_req->mp_capable)
mptcp_token_destroy_request(subflow_req->token);
tcp_request_sock_ops.destructor(req);
@@ -86,8 +89,8 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
-static bool subflow_token_join_request(struct request_sock *req,
- const struct sk_buff *skb)
+static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
+ const struct sk_buff *skb)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
u8 hmac[SHA256_DIGEST_SIZE];
@@ -97,13 +100,13 @@ static bool subflow_token_join_request(struct request_sock *req,
msk = mptcp_token_get_sock(subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
- return false;
+ return NULL;
}
local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
if (local_id < 0) {
sock_put((struct sock *)msk);
- return false;
+ return NULL;
}
subflow_req->local_id = local_id;
@@ -114,9 +117,7 @@ static bool subflow_token_join_request(struct request_sock *req,
subflow_req->remote_nonce, hmac);
subflow_req->thmac = get_unaligned_be64(hmac);
-
- sock_put((struct sock *)msk);
- return true;
+ return msk;
}
static void subflow_init_req(struct request_sock *req,
@@ -133,6 +134,7 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->msk = NULL;
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -166,12 +168,9 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
- pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
- subflow_req->remote_nonce);
- if (!subflow_token_join_request(req, skb)) {
- subflow_req->mp_join = 0;
- // @@ need to trigger RST
- }
+ subflow_req->msk = subflow_token_join_request(req, skb);
+ pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
+ subflow_req->remote_nonce, subflow_req->msk);
}
}
@@ -354,10 +353,9 @@ static bool subflow_hmac_valid(const struct request_sock *req,
const struct mptcp_subflow_request_sock *subflow_req;
u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
- bool ret;
subflow_req = mptcp_subflow_rsk(req);
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = subflow_req->msk;
if (!msk)
return false;
@@ -365,12 +363,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
- ret = true;
- if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
- ret = false;
-
- sock_put((struct sock *)msk);
- return ret;
+ return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
static void mptcp_sock_destruct(struct sock *sk)
@@ -393,6 +386,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
+ mptcp_token_destroy(mptcp_sk(sk)->token);
inet_sock_destruct(sk);
}
@@ -437,22 +431,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
- bool fallback_is_fatal = false;
+ bool fallback, fallback_is_fatal;
struct sock *new_msk = NULL;
- bool fallback = false;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- /* we need later a valid 'mp_capable' value even when options are not
- * parsed
+ /* After child creation we must look for 'mp_capable' even when options
+ * are not parsed
*/
mp_opt.mp_capable = 0;
- if (tcp_rsk(req)->is_mptcp == 0)
+
+ /* hopefully temporary handling for MP_JOIN+syncookie */
+ subflow_req = mptcp_subflow_rsk(req);
+ fallback_is_fatal = subflow_req->mp_join;
+ fallback = !tcp_rsk(req)->is_mptcp;
+ if (fallback)
goto create_child;
/* if the sk is MP_CAPABLE, we try to fetch the client key */
- subflow_req = mptcp_subflow_rsk(req);
if (subflow_req->mp_capable) {
if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
/* here we can receive and accept an in-window,
@@ -473,12 +470,11 @@ create_msk:
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- fallback_is_fatal = true;
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
- return NULL;
+ fallback = true;
}
}
@@ -521,10 +517,12 @@ create_child:
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
- owner = mptcp_token_get_sock(ctx->token);
+ owner = subflow_req->msk;
if (!owner)
goto dispose_child;
+ /* move the msk reference ownership to the subflow */
+ subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
if (!mptcp_finish_join(child))
goto dispose_child;
@@ -1052,8 +1050,10 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
err = tcp_set_ulp(sf->sk, "mptcp");
release_sock(sf->sk);
- if (err)
+ if (err) {
+ sock_release(sf);
return err;
+ }
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 2f1e5756c03a..93309081f5a4 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -6,7 +6,7 @@
config NET_NCSI
bool "NCSI interface support"
depends on INET
- ---help---
+ help
This module provides NCSI (Network Controller Sideband Interface)
support. Enable this only if your system connects to a network
device via NCSI and the ethernet driver you're using supports
@@ -14,6 +14,6 @@ config NET_NCSI
config NCSI_OEM_CMD_GET_MAC
bool "Get NCSI OEM MAC Address"
depends on NET_NCSI
- ---help---
+ help
This allows to get MAC address from NCSI firmware and set them back to
controller.
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3a3915d2e1ea..0ffe2b8723c4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -120,7 +120,7 @@ config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
default y
depends on PROC_FS
- ---help---
+ help
This option enables for the list of known conntrack entries
to be shown in procfs under net/netfilter/nf_conntrack. This
is considered obsolete in favor of using the conntrack(8)
@@ -717,7 +717,7 @@ comment "Xtables combined modules"
config NETFILTER_XT_MARK
tristate 'nfmark target and match support'
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option adds the "MARK" target and "mark" match.
Netfilter mark matching allows you to match packets based on the
@@ -733,7 +733,7 @@ config NETFILTER_XT_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
- ---help---
+ help
This option adds the "CONNMARK" target and "connmark" match.
Netfilter allows you to store a mark value per connection (a.k.a.
@@ -760,7 +760,7 @@ config NETFILTER_XT_TARGET_AUDIT
tristate "AUDIT target support"
depends on AUDIT
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a 'AUDIT' target, which can be used to create
audit records for packets dropped/accepted.
@@ -770,7 +770,7 @@ config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
table to work around buggy DHCP clients in virtualized environments.
@@ -799,7 +799,7 @@ config NETFILTER_XT_TARGET_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_CONNMARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
@@ -848,7 +848,7 @@ config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
targets, which enable the user to change the
hoplimit/time-to-live value of the IP header.
@@ -863,7 +863,7 @@ config NETFILTER_XT_TARGET_HMARK
tristate '"HMARK" target support'
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds the "HMARK" target.
The target allows you to create rules in the "raw" and "mangle" tables
@@ -925,7 +925,7 @@ config NETFILTER_XT_TARGET_MARK
tristate '"MARK" target support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
@@ -933,7 +933,7 @@ config NETFILTER_XT_TARGET_MARK
config NETFILTER_XT_NAT
tristate '"SNAT and DNAT" targets support'
depends on NF_NAT
- ---help---
+ help
This option enables the SNAT and DNAT targets.
To compile it as a module, choose M here. If unsure, say N.
@@ -941,7 +941,7 @@ config NETFILTER_XT_NAT
config NETFILTER_XT_TARGET_NETMAP
tristate '"NETMAP" target support'
depends on NF_NAT
- ---help---
+ help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
address part intact.
@@ -991,7 +991,7 @@ config NETFILTER_XT_TARGET_REDIRECT
tristate "REDIRECT target support"
depends on NF_NAT
select NF_NAT_REDIRECT
- ---help---
+ help
REDIRECT is a special case of NAT: all incoming connections are
mapped onto the incoming interface's address, causing the packets to
come to the local machine instead of passing through. This is
@@ -1021,7 +1021,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
- ---help---
+ help
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -1073,7 +1073,7 @@ config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'
depends on IPV6 || IPV6=n
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option adds a `TCPMSS' target, which allows you to alter the
MSS value of TCP SYN packets, to control the maximum size for that
connection (usually limiting it to your outgoing interface's MTU
@@ -1111,7 +1111,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
@@ -1132,7 +1132,7 @@ config NETFILTER_XT_MATCH_CGROUP
depends on NETFILTER_ADVANCED
depends on CGROUPS
select CGROUP_NET_CLASSID
- ---help---
+ help
Socket/process control group matching allows you to match locally
generated packets based on which net_cls control group processes
belong to.
@@ -1141,7 +1141,7 @@ config NETFILTER_XT_MATCH_CLUSTER
tristate '"cluster" match support'
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option allows you to build work-load-sharing clusters of
network servers/stateful firewalls without having a dedicated
load-balancing router/server/switch. Basically, this match returns
@@ -1179,7 +1179,7 @@ config NETFILTER_XT_MATCH_CONNLABEL
select NF_CONNTRACK_LABELS
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
- ---help---
+ help
This match allows you to test and assign userspace-defined labels names
to a connection. The kernel only stores bit values - mapping
names to bits is done by userspace.
@@ -1192,7 +1192,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_CONNCOUNT
- ---help---
+ help
This match allows you to match against the number of parallel
connections to a server per client IP address (or address block).
@@ -1201,7 +1201,7 @@ config NETFILTER_XT_MATCH_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_CONNMARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
@@ -1267,7 +1267,7 @@ config NETFILTER_XT_MATCH_DSCP
config NETFILTER_XT_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds an "ECN" match, which allows you to match against
the IPv4 and TCP header ECN fields.
@@ -1310,7 +1310,7 @@ config NETFILTER_XT_MATCH_HELPER
config NETFILTER_XT_MATCH_HL
tristate '"hl" hoplimit/TTL match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
HL matching allows you to match packets based on the hoplimit
in the IPv6 header, or the time-to-live field in the IPv4
header of the packet.
@@ -1327,7 +1327,7 @@ config NETFILTER_XT_MATCH_IPCOMP
config NETFILTER_XT_MATCH_IPRANGE
tristate '"iprange" address range match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a "iprange" match, which allows you to match based on
an IP address range. (Normal iptables only matches on single addresses
with an optional mask.)
@@ -1348,7 +1348,7 @@ config NETFILTER_XT_MATCH_L2TP
tristate '"l2tp" match support'
depends on NETFILTER_ADVANCED
default L2TP
- ---help---
+ help
This option adds an "L2TP" match, which allows you to match against
L2TP protocol header fields.
@@ -1386,7 +1386,7 @@ config NETFILTER_XT_MATCH_MARK
tristate '"mark" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
@@ -1428,7 +1428,7 @@ config NETFILTER_XT_MATCH_OSF
config NETFILTER_XT_MATCH_OWNER
tristate '"owner" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
Socket owner matching allows you to match locally-generated packets
based on who created the socket: the user or group. It is also
possible to check whether a socket actually exists.
@@ -1503,7 +1503,7 @@ config NETFILTER_XT_MATCH_REALM
config NETFILTER_XT_MATCH_RECENT
tristate '"recent" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This match is used for creating one or many lists of recently
used addresses and then matching against that/those list(s).
@@ -1586,7 +1586,7 @@ config NETFILTER_XT_MATCH_TCPMSS
config NETFILTER_XT_MATCH_TIME
tristate '"time" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a "time" match, which allows you to match based on
the packet arrival time (at the machine which netfilter is running)
on) or departure time/date (for locally generated packets).
@@ -1600,7 +1600,7 @@ config NETFILTER_XT_MATCH_TIME
config NETFILTER_XT_MATCH_U32
tristate '"u32" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
u32 allows you to extract quantities of up to 4 bytes from a packet,
AND them with specified masks, shift them by specified amounts and
test whether the results are in any of a set of specified ranges.
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 486959f70cf3..a8ce04a4bb72 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -326,7 +326,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 2310a316e0af..2c625e0f49ec 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index e56ced66f202..7138e080def4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -274,7 +274,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 340cb955af25..56621d6bfd29 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -460,6 +460,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
+ if (align < ip_set_extensions[id].align)
+ align = ip_set_extensions[id].align;
len = ALIGN(len, ip_set_extensions[id].align);
set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 1ee43752d6d3..521e970be402 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -682,7 +682,7 @@ retry:
}
t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
if (!t->hregion) {
- kfree(t);
+ ip_set_free(t);
ret = -ENOMEM;
goto out;
}
@@ -1533,7 +1533,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
}
t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
if (!t->hregion) {
- kfree(t);
+ ip_set_free(t);
kfree(h);
return -ENOMEM;
}
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 5b672e05d758..2c1593089ede 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -6,7 +6,7 @@ menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER
depends on (NF_CONNTRACK || NF_CONNTRACK=n)
- ---help---
+ help
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
option must be enabled for at least one of the clustered computers
@@ -31,14 +31,14 @@ config IP_VS_IPV6
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
select NF_DEFRAG_IPV6
- ---help---
+ help
Add IPv6 support to IPVS.
Say Y if unsure.
config IP_VS_DEBUG
bool "IP virtual server debugging"
- ---help---
+ help
Say Y here if you want to get additional messages useful in
debugging the IP virtual server code. You can change the debug
level in /proc/sys/net/ipv4/vs/debug_level
@@ -47,7 +47,7 @@ config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
range 8 20
default 12
- ---help---
+ help
The IPVS connection hash table uses the chaining scheme to handle
hash collisions. Using a big IPVS connection hash table will greatly
reduce conflicts when there are hundreds of thousands of connections
@@ -78,13 +78,13 @@ comment "IPVS transport protocol load balancing support"
config IP_VS_PROTO_TCP
bool "TCP load balancing support"
- ---help---
+ help
This option enables support for load balancing TCP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_UDP
bool "UDP load balancing support"
- ---help---
+ help
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
@@ -93,20 +93,20 @@ config IP_VS_PROTO_AH_ESP
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
- ---help---
+ help
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
- ---help---
+ help
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
select LIBCRC32C
- ---help---
+ help
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.
@@ -114,7 +114,7 @@ comment "IPVS scheduler"
config IP_VS_RR
tristate "round-robin scheduling"
- ---help---
+ help
The robin-robin scheduling algorithm simply directs network
connections to different real servers in a round-robin manner.
@@ -123,7 +123,7 @@ config IP_VS_RR
config IP_VS_WRR
tristate "weighted round-robin scheduling"
- ---help---
+ help
The weighted robin-robin scheduling algorithm directs network
connections to different real servers based on server weights
in a round-robin manner. Servers with higher weights receive
@@ -136,7 +136,7 @@ config IP_VS_WRR
config IP_VS_LC
tristate "least-connection scheduling"
- ---help---
+ help
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
connections.
@@ -146,7 +146,7 @@ config IP_VS_LC
config IP_VS_WLC
tristate "weighted least-connection scheduling"
- ---help---
+ help
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
normalized by the server weight.
@@ -156,7 +156,7 @@ config IP_VS_WLC
config IP_VS_FO
tristate "weighted failover scheduling"
- ---help---
+ help
The weighted failover scheduling algorithm directs network
connections to the server with the highest weight that is
currently available.
@@ -166,7 +166,7 @@ config IP_VS_FO
config IP_VS_OVF
tristate "weighted overflow scheduling"
- ---help---
+ help
The weighted overflow scheduling algorithm directs network
connections to the server with the highest weight that is
currently available and overflows to the next when active
@@ -177,7 +177,7 @@ config IP_VS_OVF
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
- ---help---
+ help
The locality-based least-connection scheduling algorithm is for
destination IP load balancing. It is usually used in cache cluster.
This algorithm usually directs packet destined for an IP address to
@@ -191,7 +191,7 @@ config IP_VS_LBLC
config IP_VS_LBLCR
tristate "locality-based least-connection with replication scheduling"
- ---help---
+ help
The locality-based least-connection with replication scheduling
algorithm is also for destination IP load balancing. It is
usually used in cache cluster. It differs from the LBLC scheduling
@@ -209,7 +209,7 @@ config IP_VS_LBLCR
config IP_VS_DH
tristate "destination hashing scheduling"
- ---help---
+ help
The destination hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
hash table by their destination IP addresses.
@@ -219,7 +219,7 @@ config IP_VS_DH
config IP_VS_SH
tristate "source hashing scheduling"
- ---help---
+ help
The source hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
hash table by their source IP addresses.
@@ -229,7 +229,7 @@ config IP_VS_SH
config IP_VS_MH
tristate "maglev hashing scheduling"
- ---help---
+ help
The maglev consistent hashing scheduling algorithm provides the
Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
network connections to the servers through looking up a statically
@@ -248,7 +248,7 @@ config IP_VS_MH
config IP_VS_SED
tristate "shortest expected delay scheduling"
- ---help---
+ help
The shortest expected delay scheduling algorithm assigns network
connections to the server with the shortest expected delay. The
expected delay that the job will experience is (Ci + 1) / Ui if
@@ -261,7 +261,7 @@ config IP_VS_SED
config IP_VS_NQ
tristate "never queue scheduling"
- ---help---
+ help
The never queue scheduling algorithm adopts a two-speed model.
When there is an idle server available, the job will be sent to
the idle server, instead of waiting for a fast one. When there
@@ -278,7 +278,7 @@ config IP_VS_SH_TAB_BITS
int "IPVS source hashing table size (the Nth power of 2)"
range 4 20
default 8
- ---help---
+ help
The source hashing scheduler maps source IPs to destinations
stored in a hash table. This table is tiled by each destination
until all slots in the table are filled. When using weights to
@@ -293,7 +293,7 @@ config IP_VS_MH_TAB_INDEX
int "IPVS maglev hashing table index of size (the prime numbers)"
range 8 17
default 12
- ---help---
+ help
The maglev hashing scheduler maps source IPs to destinations
stored in a hash table. This table is assigned by a preference
list of the positions to each destination until all slots in
@@ -312,7 +312,7 @@ config IP_VS_FTP
depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
NF_CONNTRACK_FTP
select IP_VS_NFCT
- ---help---
+ help
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
the IP address and port number of real servers cannot be sent to
@@ -326,7 +326,7 @@ config IP_VS_FTP
config IP_VS_NFCT
bool "Netfilter connection tracking"
depends on NF_CONNTRACK
- ---help---
+ help
The Netfilter connection tracking support allows the IPVS
connection state to be exported to the Netfilter framework
for filtering purposes.
@@ -335,7 +335,7 @@ config IP_VS_PE_SIP
tristate "SIP persistence engine"
depends on IP_VS_PROTO_UDP
depends on NF_CONNTRACK_SIP
- ---help---
+ help
Allow persistence based on the SIP Call-ID
endif # IP_VS
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 605e0f68f8bd..2b8abbfe018c 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1717,6 +1717,8 @@ static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
struct netns_ipvs *ipvs = tinfo->ipvs;
+ struct sock *sk = tinfo->sock->sk;
+ struct udp_sock *up = udp_sk(sk);
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
@@ -1724,12 +1726,14 @@ static int sync_thread_backup(void *data)
ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
while (!kthread_should_stop()) {
- wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
- !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
- || kthread_should_stop());
+ wait_event_interruptible(*sk_sleep(sk),
+ !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+ !skb_queue_empty_lockless(&up->reader_queue) ||
+ kthread_should_stop());
/* do we have data now? */
- while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+ while (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+ !skb_queue_empty_lockless(&up->reader_queue)) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
ipvs->bcfg.sync_maxlen);
if (len <= 0) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 79cd9dde457b..f33d72c5b06e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2158,6 +2158,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
err = __nf_conntrack_update(net, skb, ct, ctinfo);
if (err < 0)
return err;
+
+ ct = nf_ct_get(skb, &ctinfo);
}
return nf_confirm_cthelper(skb, ct, ctinfo);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 9eca90414bb7..b22801f97bce 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -382,7 +382,7 @@ static int help(struct sk_buff *skb,
int ret;
u32 seq;
int dir = CTINFO2DIR(ctinfo);
- unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff);
+ unsigned int matchlen, matchoff;
struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
union nf_inet_addr *daddr;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d7bd8b1f27d5..832eabecfbdd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -939,7 +939,8 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
filter->mark.mask = 0xffffffff;
}
} else if (cda[CTA_MARK_MASK]) {
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto err_filter;
}
#endif
if (!cda[CTA_FILTER])
@@ -947,15 +948,17 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
if (filter->orig_flags) {
- if (!cda[CTA_TUPLE_ORIG])
- return ERR_PTR(-EINVAL);
+ if (!cda[CTA_TUPLE_ORIG]) {
+ err = -EINVAL;
+ goto err_filter;
+ }
err = ctnetlink_parse_tuple_filter(cda, &filter->orig,
CTA_TUPLE_ORIG,
@@ -963,23 +966,32 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
&filter->zone,
filter->orig_flags);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
}
if (filter->reply_flags) {
- if (!cda[CTA_TUPLE_REPLY])
- return ERR_PTR(-EINVAL);
+ if (!cda[CTA_TUPLE_REPLY]) {
+ err = -EINVAL;
+ goto err_filter;
+ }
err = ctnetlink_parse_tuple_filter(cda, &filter->reply,
CTA_TUPLE_REPLY,
filter->family,
&filter->zone,
filter->orig_flags);
- if (err < 0)
- return ERR_PTR(err);
+ if (err < 0) {
+ err = -EINVAL;
+ goto err_filter;
+ }
}
return filter;
+
+err_filter:
+ kfree(filter);
+
+ return ERR_PTR(err);
}
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index f108a76925dd..2b01a151eaa8 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -73,3 +73,4 @@ EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter packet duplication support");
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 6a3034f84ab6..b1eb5272b379 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -387,51 +387,6 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
-int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
- flow_setup_cb_t *cb, void *cb_priv)
-{
- struct flow_block *block = &flow_table->flow_block;
- struct flow_block_cb *block_cb;
- int err = 0;
-
- down_write(&flow_table->flow_block_lock);
- block_cb = flow_block_cb_lookup(block, cb, cb_priv);
- if (block_cb) {
- err = -EEXIST;
- goto unlock;
- }
-
- block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
- if (IS_ERR(block_cb)) {
- err = PTR_ERR(block_cb);
- goto unlock;
- }
-
- list_add_tail(&block_cb->list, &block->cb_list);
-
-unlock:
- up_write(&flow_table->flow_block_lock);
- return err;
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb);
-
-void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
- flow_setup_cb_t *cb, void *cb_priv)
-{
- struct flow_block *block = &flow_table->flow_block;
- struct flow_block_cb *block_cb;
-
- down_write(&flow_table->flow_block_lock);
- block_cb = flow_block_cb_lookup(block, cb, cb_priv);
- if (block_cb) {
- list_del(&block_cb->list);
- flow_block_cb_free(block_cb);
- } else {
- WARN_ON(true);
- }
- up_write(&flow_table->flow_block_lock);
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb);
static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
__be16 port, __be16 new_port)
@@ -639,3 +594,4 @@ module_exit(nf_flow_table_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter flow table module");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 88bedf1ff1ae..bc4126d8ef65 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -72,3 +72,4 @@ module_exit(nf_flow_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
+MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module");
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 62651e6683f6..5fff1e040168 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -950,6 +950,7 @@ static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
nf_flow_table_gc_cleanup(flowtable, dev);
down_write(&flowtable->flow_block_lock);
list_del(&block_cb->list);
+ list_del(&block_cb->driver_list);
flow_block_cb_free(block_cb);
up_write(&flowtable->flow_block_lock);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b9cbe1e2453e..ebcdc8e54476 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -1237,3 +1237,4 @@ EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 073aa1051d43..2b3862ea0505 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -12,6 +12,7 @@
#include <linux/netlink.h>
#include <linux/vmalloc.h>
#include <linux/rhashtable.h>
+#include <linux/audit.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
@@ -188,24 +189,6 @@ static void nft_netdev_unregister_hooks(struct net *net,
nf_unregister_net_hook(net, &hook->ops);
}
-static int nft_register_basechain_hooks(struct net *net, int family,
- struct nft_base_chain *basechain)
-{
- if (family == NFPROTO_NETDEV)
- return nft_netdev_register_hooks(net, &basechain->hook_list);
-
- return nf_register_net_hook(net, &basechain->ops);
-}
-
-static void nft_unregister_basechain_hooks(struct net *net, int family,
- struct nft_base_chain *basechain)
-{
- if (family == NFPROTO_NETDEV)
- nft_netdev_unregister_hooks(net, &basechain->hook_list);
- else
- nf_unregister_net_hook(net, &basechain->ops);
-}
-
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
@@ -223,7 +206,10 @@ static int nf_tables_register_hook(struct net *net,
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
- return nft_register_basechain_hooks(net, table->family, basechain);
+ if (table->family == NFPROTO_NETDEV)
+ return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+ return nf_register_net_hook(net, &basechain->ops);
}
static void nf_tables_unregister_hook(struct net *net,
@@ -242,7 +228,10 @@ static void nf_tables_unregister_hook(struct net *net,
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- nft_unregister_basechain_hooks(net, table->family, basechain);
+ if (table->family == NFPROTO_NETDEV)
+ nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -693,6 +682,17 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
+ ctx->table->name, ctx->table->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ ctx->table->use,
+ event == NFT_MSG_NEWTABLE ?
+ AUDIT_NFT_OP_TABLE_REGISTER :
+ AUDIT_NFT_OP_TABLE_UNREGISTER,
+ GFP_KERNEL);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -832,8 +832,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
if (cnt && i++ == cnt)
break;
- nft_unregister_basechain_hooks(net, table->family,
- nft_base_chain(chain));
+ nf_tables_unregister_hook(net, table, chain);
}
}
@@ -848,8 +847,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
if (!nft_is_base_chain(chain))
continue;
- err = nft_register_basechain_hooks(net, table->family,
- nft_base_chain(chain));
+ err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err_register_hooks;
@@ -894,11 +892,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
ret = nf_tables_table_enable(ctx->net, ctx->table);
- if (ret >= 0) {
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (ret >= 0)
nft_trans_table_enable(trans) = true;
- }
+ else
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
}
if (ret < 0)
goto err;
@@ -1428,6 +1427,18 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ ctx->chain->name, ctx->chain->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ ctx->chain->use,
+ event == NFT_MSG_NEWCHAIN ?
+ AUDIT_NFT_OP_CHAIN_REGISTER :
+ AUDIT_NFT_OP_CHAIN_UNREGISTER,
+ GFP_KERNEL);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -2693,6 +2704,18 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ ctx->chain->name, ctx->chain->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ rule->handle,
+ event == NFT_MSG_NEWRULE ?
+ AUDIT_NFT_OP_RULE_REGISTER :
+ AUDIT_NFT_OP_RULE_UNREGISTER,
+ GFP_KERNEL);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -3695,6 +3718,18 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
struct sk_buff *skb;
u32 portid = ctx->portid;
int err;
+ char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ set->name, set->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ set->field_count,
+ event == NFT_MSG_NEWSET ?
+ AUDIT_NFT_OP_SET_REGISTER :
+ AUDIT_NFT_OP_SET_UNREGISTER,
+ gfp_flags);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -4811,6 +4846,18 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
u32 portid = ctx->portid;
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ set->name, set->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ set->handle,
+ event == NFT_MSG_NEWSETELEM ?
+ AUDIT_NFT_OP_SETELEM_REGISTER :
+ AUDIT_NFT_OP_SETELEM_UNREGISTER,
+ GFP_KERNEL);
+ kfree(buf);
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
@@ -5892,6 +5939,20 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
obj->ops->type->type != filter->type)
goto cont;
+ if (reset) {
+ char *buf = kasprintf(GFP_ATOMIC,
+ "%s:%llu;?:0",
+ table->name,
+ table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ AUDIT_NFT_OP_OBJ_RESET,
+ GFP_ATOMIC);
+ kfree(buf);
+ }
+
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWOBJ,
@@ -6002,6 +6063,18 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
reset = true;
+ if (reset) {
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%llu;?:0",
+ table->name, table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ AUDIT_NFT_OP_OBJ_RESET,
+ GFP_ATOMIC);
+ kfree(buf);
+ }
+
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
family, table, obj, reset);
@@ -6077,6 +6150,17 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(gfp, "%s:%llu;?:0",
+ table->name, table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ event == NFT_MSG_NEWOBJ ?
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
+ gfp);
+ kfree(buf);
if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -6550,12 +6634,22 @@ err1:
return err;
}
+static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook)
+{
+ struct nft_hook *this, *next;
+
+ list_for_each_entry_safe(this, next, &flowtable_hook->list, list) {
+ list_del(&this->list);
+ kfree(this);
+ }
+}
+
static int nft_delflowtable_hook(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
- struct nft_hook *this, *next, *hook;
+ struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -6564,33 +6658,40 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
if (err < 0)
return err;
- list_for_each_entry_safe(this, next, &flowtable_hook.list, list) {
+ list_for_each_entry(this, &flowtable_hook.list, list) {
hook = nft_hook_list_find(&flowtable->hook_list, this);
if (!hook) {
err = -ENOENT;
goto err_flowtable_del_hook;
}
hook->inactive = true;
- list_del(&this->list);
- kfree(this);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
sizeof(struct nft_trans_flowtable));
- if (!trans)
- return -ENOMEM;
+ if (!trans) {
+ err = -ENOMEM;
+ goto err_flowtable_del_hook;
+ }
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ nft_flowtable_hook_release(&flowtable_hook);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
err_flowtable_del_hook:
- list_for_each_entry(hook, &flowtable_hook.list, list)
+ list_for_each_entry(this, &flowtable_hook.list, list) {
+ hook = nft_hook_list_find(&flowtable->hook_list, this);
+ if (!hook)
+ break;
+
hook->inactive = false;
+ }
+ nft_flowtable_hook_release(&flowtable_hook);
return err;
}
@@ -6856,6 +6957,18 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ flowtable->table->name, flowtable->table->handle,
+ flowtable->name, flowtable->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ flowtable->hooknum,
+ event == NFT_MSG_NEWFLOWTABLE ?
+ AUDIT_NFT_OP_FLOWTABLE_REGISTER :
+ AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ GFP_KERNEL);
+ kfree(buf);
if (ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -6977,6 +7090,9 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
struct sk_buff *skb2;
int err;
+ audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
+ AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL);
+
if (nlmsg_report(nlh) &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 185fc82c99aa..c7cf1cde46de 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -296,6 +296,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
basechain, &extack);
mutex_lock(&net->nft.commit_mutex);
+ list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
nft_flow_offload_unbind(&bo, basechain);
mutex_unlock(&net->nft.commit_mutex);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 99127e2d95a8..5f24edf95830 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -33,6 +33,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+MODULE_DESCRIPTION("Netfilter messages via netlink socket");
#define nfnl_dereference_protected(id) \
rcu_dereference_protected(table[(id)].subsys, \
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 0ba020ca38e6..f02992419850 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -689,7 +689,7 @@ nfulnl_log_packet(struct net *net,
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
struct nf_conn *ct = NULL;
- enum ip_conntrack_info uninitialized_var(ctinfo);
+ enum ip_conntrack_info ctinfo;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3243a31f6e82..dadfc06245a3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *indev;
struct net_device *outdev;
struct nf_conn *ct = NULL;
- enum ip_conntrack_info uninitialized_var(ctinfo);
+ enum ip_conntrack_info ctinfo;
struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
@@ -1168,7 +1168,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
struct nfqnl_instance *queue;
unsigned int verdict;
struct nf_queue_entry *entry;
- enum ip_conntrack_info uninitialized_var(ctinfo);
+ enum ip_conntrack_info ctinfo;
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f9adca62ccb3..aa1a066cb74b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -902,3 +902,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("match");
MODULE_ALIAS_NFT_EXPR("target");
+MODULE_DESCRIPTION("x_tables over nftables support");
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 69d6173f91e2..7d0761fad37e 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -280,3 +280,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso");
MODULE_ALIAS_NFT_EXPR("connlimit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
+MODULE_DESCRIPTION("nftables connlimit rule support");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f6d4d0fa23a6..85ed461ec24e 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -303,3 +303,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("counter");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
+MODULE_DESCRIPTION("nftables counter rule support");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index faea72c2df32..77258af1fce0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1345,3 +1345,4 @@ MODULE_ALIAS_NFT_EXPR("notrack");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT);
+MODULE_DESCRIPTION("Netfilter nf_tables conntrack module");
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index c2e78c160fd7..40788b3f1071 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -102,3 +102,4 @@ module_exit(nft_dup_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(5, "dup");
+MODULE_DESCRIPTION("nftables netdev packet duplication support");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
index 465432e0531b..a88d44e163d1 100644
--- a/net/netfilter/nft_fib_inet.c
+++ b/net/netfilter/nft_fib_inet.c
@@ -76,3 +76,4 @@ module_exit(nft_fib_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
+MODULE_DESCRIPTION("nftables fib inet support");
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
index a2e726ae7f07..3f3478abd845 100644
--- a/net/netfilter/nft_fib_netdev.c
+++ b/net/netfilter/nft_fib_netdev.c
@@ -85,3 +85,4 @@ module_exit(nft_fib_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
+MODULE_DESCRIPTION("nftables netdev fib lookups support");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index b70b48996801..3b9b97aa4b32 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -286,3 +286,4 @@ module_exit(nft_flow_offload_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("flow_offload");
+MODULE_DESCRIPTION("nftables hardware flow offload module");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index b836d550b919..96371d878e7e 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -248,3 +248,4 @@ module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
MODULE_ALIAS_NFT_EXPR("hash");
+MODULE_DESCRIPTION("Netfilter nftables hash module");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 35b67d7e3694..0e2c315c3b5e 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -372,3 +372,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("limit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
+MODULE_DESCRIPTION("nftables limit expression support");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index fe4831f2258f..57899454a530 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -298,3 +298,4 @@ module_exit(nft_log_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("log");
+MODULE_DESCRIPTION("Netfilter nf_tables log module");
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index bc9fd98c5d6d..71390b727040 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -305,3 +305,4 @@ module_exit(nft_masq_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_EXPR("masq");
+MODULE_DESCRIPTION("Netfilter nftables masquerade expression support");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 23a7bfd10521..4bcf33b049c4 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -402,3 +402,4 @@ module_exit(nft_nat_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_DESCRIPTION("Network Address Translation support");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 48edb9d5f012..f1fc824f9737 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -217,3 +217,4 @@ module_exit(nft_ng_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
MODULE_ALIAS_NFT_EXPR("numgen");
+MODULE_DESCRIPTION("nftables number generator module");
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index bfd18d2b65a2..5f9207a9f485 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -252,3 +252,4 @@ module_exit(nft_objref_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("objref");
+MODULE_DESCRIPTION("nftables stateful object reference module");
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index b42247aa48a9..c261d57a666a 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -149,3 +149,4 @@ module_exit(nft_osf_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
MODULE_ALIAS_NFT_EXPR("osf");
+MODULE_DESCRIPTION("nftables passive OS fingerprint support");
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 5ece0a6aa8c3..23265d757acb 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -216,3 +216,4 @@ module_exit(nft_queue_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Leblond <eric@regit.org>");
MODULE_ALIAS_NFT_EXPR("queue");
+MODULE_DESCRIPTION("Netfilter nftables queue module");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 4413690591f2..0363f533a42b 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -254,3 +254,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("quota");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA);
+MODULE_DESCRIPTION("Netfilter nftables quota module");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 5b779171565c..2056051c0af0 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -292,3 +292,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_EXPR("redir");
+MODULE_DESCRIPTION("Netfilter nftables redirect support");
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 00f865fb80ca..86eafbb0fdd0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -119,3 +119,4 @@ EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Netfilter x_tables over nftables module");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index f41f414b72d1..cf8f2646e93c 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -149,3 +149,4 @@ module_exit(nft_reject_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
+MODULE_DESCRIPTION("Netfilter nftables reject inet support");
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 8b5acc6910fd..8c04388296b0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1242,7 +1242,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
- if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
+ if (!*get_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
+ put_cpu_ptr(m->scratch);
+
err = pipapo_realloc_scratch(m, bsize_max);
if (err)
return err;
@@ -1250,6 +1252,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
this_cpu_write(nft_pipapo_scratch_index, false);
m->bsize_max = bsize_max;
+ } else {
+ put_cpu_ptr(m->scratch);
}
*ext2 = &e->ext;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 62f416bc0579..b6aad3fc46c3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -271,12 +271,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (nft_rbtree_interval_start(new)) {
if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask))
+ nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
} else {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
- genmask);
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
}
} else if (d > 0) {
p = &parent->rb_right;
@@ -284,9 +286,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (nft_rbtree_interval_end(new)) {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
- genmask);
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
} else if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask)) {
+ nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
overlap = true;
}
} else {
@@ -294,15 +298,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- if (nft_set_elem_active(&rbe->ext, genmask))
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
} else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
- if (nft_set_elem_active(&rbe->ext, genmask))
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask)) {
+ } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
*ext = &rbe->ext;
return -EEXIST;
} else {
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index e2c1fc608841..4fda8b3f1762 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -388,3 +388,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
MODULE_ALIAS_NFT_EXPR("synproxy");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 30be5787fbde..d3eb953d0333 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -719,3 +719,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("tunnel");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
+MODULE_DESCRIPTION("nftables tunnel expression support");
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 99a468be4a59..9ad8f3ff66f5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1410,7 +1410,8 @@ xt_replace_table(struct xt_table *table,
audit_log_nfcfg(table->name, table->af, private->number,
!private->number ? AUDIT_XT_OP_REGISTER :
- AUDIT_XT_OP_REPLACE);
+ AUDIT_XT_OP_REPLACE,
+ GFP_KERNEL);
return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1473,7 +1474,7 @@ void *xt_unregister_table(struct xt_table *table)
list_del(&table->list);
mutex_unlock(&xt[table->af].mutex);
audit_log_nfcfg(table->name, table->af, private->number,
- AUDIT_XT_OP_UNREGISTER);
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
kfree(table);
return private;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index a8e5f6c8db7a..b4f7bbc3f3ca 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -244,3 +244,4 @@ MODULE_ALIAS("ipt_SNAT");
MODULE_ALIAS("ipt_DNAT");
MODULE_ALIAS("ip6t_SNAT");
MODULE_ALIAS("ip6t_DNAT");
+MODULE_DESCRIPTION("SNAT and DNAT targets support");
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index 07b03c306f28..4383ac29693e 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -8,7 +8,7 @@ config NETLABEL
depends on SECURITY
select CRC_CCITT if IPV6
default n
- ---help---
+ help
NetLabel provides support for explicit network packet labeling
protocols such as CIPSO and RIPSO. For more information see
Documentation/netlabel as well as the NetLabel SourceForge project
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 20f967974da0..1039d4f2ce11 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -6,6 +6,6 @@
config NETLINK_DIAG
tristate "NETLINK: socket monitoring interface"
default n
- ---help---
+ help
Support for NETLINK socket monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6c19b91bbb86..9395ee8a868d 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -351,22 +351,11 @@ int genl_register_family(struct genl_family *family)
start = end = GENL_ID_VFS_DQUOT;
}
- if (family->maxattr && !family->parallel_ops) {
- family->attrbuf = kmalloc_array(family->maxattr + 1,
- sizeof(struct nlattr *),
- GFP_KERNEL);
- if (family->attrbuf == NULL) {
- err = -ENOMEM;
- goto errout_locked;
- }
- } else
- family->attrbuf = NULL;
-
family->id = idr_alloc_cyclic(&genl_fam_idr, family,
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_free;
+ goto errout_locked;
}
err = genl_validate_assign_mc_groups(family);
@@ -385,8 +374,6 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
-errout_free:
- kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
return err;
@@ -419,8 +406,6 @@ int genl_unregister_family(const struct genl_family *family)
atomic_read(&genl_sk_destructing_cnt) == 0);
genl_unlock();
- kfree(family->attrbuf);
-
genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
return 0;
@@ -474,8 +459,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
struct netlink_ext_ack *extack,
const struct genl_ops *ops,
int hdrlen,
- enum genl_validate_flags no_strict_flag,
- bool parallel)
+ enum genl_validate_flags no_strict_flag)
{
enum netlink_validation validate = ops->validate & no_strict_flag ?
NL_VALIDATE_LIBERAL :
@@ -486,31 +470,23 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
if (!family->maxattr)
return NULL;
- if (parallel) {
- attrbuf = kmalloc_array(family->maxattr + 1,
- sizeof(struct nlattr *), GFP_KERNEL);
- if (!attrbuf)
- return ERR_PTR(-ENOMEM);
- } else {
- attrbuf = family->attrbuf;
- }
+ attrbuf = kmalloc_array(family->maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf)
+ return ERR_PTR(-ENOMEM);
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
if (err) {
- if (parallel)
- kfree(attrbuf);
+ kfree(attrbuf);
return ERR_PTR(err);
}
return attrbuf;
}
-static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
- struct nlattr **attrbuf,
- bool parallel)
+static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf)
{
- if (parallel)
- kfree(attrbuf);
+ kfree(attrbuf);
}
struct genl_start_context {
@@ -537,15 +513,14 @@ static int genl_start(struct netlink_callback *cb)
attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack,
ops, ctx->hdrlen,
- GENL_DONT_VALIDATE_DUMP_STRICT,
- true);
+ GENL_DONT_VALIDATE_DUMP_STRICT);
if (IS_ERR(attrs))
return PTR_ERR(attrs);
no_attrs:
info = genl_dumpit_info_alloc();
if (!info) {
- kfree(attrs);
+ genl_family_rcv_msg_attrs_free(attrs);
return -ENOMEM;
}
info->family = ctx->family;
@@ -562,7 +537,7 @@ no_attrs:
}
if (rc) {
- kfree(attrs);
+ genl_family_rcv_msg_attrs_free(info->attrs);
genl_dumpit_info_free(info);
cb->data = NULL;
}
@@ -591,7 +566,7 @@ static int genl_lock_done(struct netlink_callback *cb)
rc = ops->done(cb);
genl_unlock();
}
- genl_family_rcv_msg_attrs_free(info->family, info->attrs, false);
+ genl_family_rcv_msg_attrs_free(info->attrs);
genl_dumpit_info_free(info);
return rc;
}
@@ -604,7 +579,7 @@ static int genl_parallel_done(struct netlink_callback *cb)
if (ops->done)
rc = ops->done(cb);
- genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_family_rcv_msg_attrs_free(info->attrs);
genl_dumpit_info_free(info);
return rc;
}
@@ -671,8 +646,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
ops, hdrlen,
- GENL_DONT_VALIDATE_STRICT,
- family->parallel_ops);
+ GENL_DONT_VALIDATE_STRICT);
if (IS_ERR(attrbuf))
return PTR_ERR(attrbuf);
@@ -698,7 +672,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
family->post_doit(ops, skb, &info);
out:
- genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops);
+ genl_family_rcv_msg_attrs_free(attrbuf);
return err;
}
@@ -1170,60 +1144,11 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
-static int genl_bind(struct net *net, int group)
-{
- struct genl_family *f;
- int err = -ENOENT;
- unsigned int id;
-
- down_read(&cb_lock);
-
- idr_for_each_entry(&genl_fam_idr, f, id) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (!f->netnsok && net != &init_net)
- err = -ENOENT;
- else if (f->mcast_bind)
- err = f->mcast_bind(net, fam_grp);
- else
- err = 0;
- break;
- }
- }
- up_read(&cb_lock);
-
- return err;
-}
-
-static void genl_unbind(struct net *net, int group)
-{
- struct genl_family *f;
- unsigned int id;
-
- down_read(&cb_lock);
-
- idr_for_each_entry(&genl_fam_idr, f, id) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (f->mcast_unbind)
- f->mcast_unbind(net, fam_grp);
- break;
- }
- }
- up_read(&cb_lock);
-}
-
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
- .bind = genl_bind,
- .unbind = genl_unbind,
};
/* we'll bump the group number right afterwards */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eccc7d366e17..f90ef6934b8f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -70,6 +70,7 @@ static const struct proto_ops nr_proto_ops;
* separate class since they always nest.
*/
static struct lock_class_key nr_netdev_xmit_lock_key;
+static struct lock_class_key nr_netdev_addr_lock_key;
static void nr_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -80,6 +81,7 @@ static void nr_set_lockdep_one(struct net_device *dev,
static void nr_set_lockdep_key(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
}
diff --git a/net/nfc/hci/Kconfig b/net/nfc/hci/Kconfig
index 4822d6f46947..9500b8a27475 100644
--- a/net/nfc/hci/Kconfig
+++ b/net/nfc/hci/Kconfig
@@ -13,6 +13,6 @@ config NFC_SHDLC
select CRC_CCITT
bool "SHDLC link layer for HCI based NFC drivers"
default n
- ---help---
+ help
Say yes if you use an NFC HCI driver that requires SHDLC link layer.
If unsure, say N here.
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 7cd524884304..78ea8c94dcba 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1228,10 +1228,13 @@ int nci_register_device(struct nci_dev *ndev)
rc = nfc_register_device(ndev->nfc_dev);
if (rc)
- goto destroy_rx_wq_exit;
+ goto destroy_tx_wq_exit;
goto exit;
+destroy_tx_wq_exit:
+ destroy_workqueue(ndev->tx_wq);
+
destroy_rx_wq_exit:
destroy_workqueue(ndev->rx_wq);
diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
index 19af948ab6f0..84f2a2823417 100644
--- a/net/nsh/Kconfig
+++ b/net/nsh/Kconfig
@@ -2,7 +2,7 @@
menuconfig NET_NSH
tristate "Network Service Header (NSH) protocol"
default n
- ---help---
+ help
Network Service Header is an implementation of Service Function
Chaining (RFC 7665). The current implementation in Linux supports
only MD type 1 and only with the openvswitch module.
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 22d7d5604b4c..15bd287f5cbd 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -15,7 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
- ---help---
+ help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
@@ -43,7 +43,7 @@ config OPENVSWITCH_GRE
depends on OPENVSWITCH
depends on NET_IPGRE
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create GRE
vport.
@@ -56,7 +56,7 @@ config OPENVSWITCH_VXLAN
depends on OPENVSWITCH
depends on VXLAN
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create vxlan vport.
Say N to exclude this support and reduce the binary size.
@@ -68,7 +68,7 @@ config OPENVSWITCH_GENEVE
depends on OPENVSWITCH
depends on GENEVE
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create geneve vport.
Say N to exclude this support and reduce the binary size.
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fc0efd8833c8..2611657f40ca 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1169,9 +1169,10 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr, bool last)
{
+ struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
const struct nlattr *actions, *cpl_arg;
+ int len, max_len, rem = nla_len(attr);
const struct check_pkt_len_arg *arg;
- int rem = nla_len(attr);
bool clone_flow_key;
/* The first netlink attribute in 'attr' is always
@@ -1180,7 +1181,11 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
cpl_arg = nla_data(attr);
arg = nla_data(cpl_arg);
- if (skb->len <= arg->pkt_len) {
+ len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
+ max_len = arg->pkt_len;
+
+ if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
+ len <= max_len) {
/* Second netlink attribute in 'attr' is always
* 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
*/
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index b4abad135294..2997382d597c 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -5,7 +5,7 @@
config PACKET
tristate "Packet socket"
- ---help---
+ help
The Packet protocol is used by applications which communicate
directly with network devices without an intermediate network
protocol implemented in the kernel, e.g. tcpdump. If you want them
@@ -20,6 +20,6 @@ config PACKET_DIAG
tristate "Packet: sockets monitoring interface"
depends on PACKET
default n
- ---help---
+ help
Support for PF_PACKET sockets monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index f362ca316015..b4020b84760f 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -4,7 +4,7 @@
config QRTR
tristate "Qualcomm IPC Router support"
- ---help---
+ help
Say Y if you intend to use Qualcomm IPC router protocol. The
protocol is used to communicate with services provided by other
hardware blocks in the system.
@@ -17,13 +17,13 @@ if QRTR
config QRTR_SMD
tristate "SMD IPC Router channels"
depends on RPMSG || (COMPILE_TEST && RPMSG=n)
- ---help---
+ help
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
config QRTR_TUN
tristate "TUN device for Qualcomm IPC Router"
- ---help---
+ help
Say Y here to expose a character device that allows user space to
implement endpoints of QRTR, for purpose of tunneling data to other
hosts or testing purposes.
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2d8d6131bc5f..300a104b9a0f 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -166,6 +166,7 @@ static void __qrtr_node_release(struct kref *kref)
{
struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
struct radix_tree_iter iter;
+ struct qrtr_tx_flow *flow;
unsigned long flags;
void __rcu **slot;
@@ -181,8 +182,9 @@ static void __qrtr_node_release(struct kref *kref)
/* Free tx flow counters */
radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
+ flow = *slot;
radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
- kfree(*slot);
+ kfree(flow);
}
kfree(node);
}
@@ -427,7 +429,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
unsigned int ver;
size_t hdrlen;
- if (len & 3)
+ if (len == 0 || len & 3)
return -EINVAL;
skb = netdev_alloc_skb(NULL, len);
@@ -441,6 +443,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
switch (ver) {
case QRTR_PROTO_VER_1:
+ if (len < sizeof(*v1))
+ goto err;
v1 = data;
hdrlen = sizeof(*v1);
@@ -454,6 +458,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
size = le32_to_cpu(v1->size);
break;
case QRTR_PROTO_VER_2:
+ if (len < sizeof(*v2))
+ goto err;
v2 = data;
hdrlen = sizeof(*v2) + v2->optlen;
@@ -1174,6 +1180,7 @@ static int qrtr_release(struct socket *sock)
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
+ sock_orphan(sk);
sock->sk = NULL;
if (!sock_flag(sk, SOCK_ZAPPED))
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index c64e154bc18f..75cd696963b2 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -3,14 +3,14 @@
config RDS
tristate "The Reliable Datagram Sockets Protocol"
depends on INET
- ---help---
+ help
The RDS (Reliable Datagram Sockets) protocol provides reliable,
sequenced delivery of datagrams over Infiniband or TCP.
config RDS_RDMA
tristate "RDS over Infiniband"
depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
- ---help---
+ help
Allow RDS to use Infiniband as a transport.
This transport supports RDMA operations.
@@ -18,7 +18,7 @@ config RDS_TCP
tristate "RDS over TCP"
depends on RDS
depends on IPV6 || !IPV6
- ---help---
+ help
Allow RDS to use TCP as a transport.
This transport does not support RDMA operations.
diff --git a/net/rds/connection.c b/net/rds/connection.c
index ed7f2133acc2..f2fcab182095 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -905,6 +905,17 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
+/* Check connectivity of all paths
+ */
+void rds_check_all_paths(struct rds_connection *conn)
+{
+ int i = 0;
+
+ do {
+ rds_conn_path_connect_if_down(&conn->c_path[i]);
+ } while (++i < conn->c_npaths);
+}
+
void rds_conn_connect_if_down(struct rds_connection *conn)
{
WARN_ON(conn->c_trans->t_mp_capable);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 5ae069d39eab..8dfff43cf07f 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -264,7 +264,13 @@ struct rds_ib_device {
int *vector_load;
};
-#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent)
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent;
+
+ parent = ibdev->dev.parent;
+ return parent ? dev_to_node(parent) : NUMA_NO_NODE;
+}
#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
/* bits for i_ack_flags */
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 6019b0c004a9..106e862996b9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -778,6 +778,7 @@ void rds_conn_drop(struct rds_connection *conn);
void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
void rds_conn_connect_if_down(struct rds_connection *conn);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
+void rds_check_all_paths(struct rds_connection *conn);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
@@ -823,6 +824,12 @@ rds_conn_path_up(struct rds_conn_path *cp)
}
static inline int
+rds_conn_path_down(struct rds_conn_path *cp)
+{
+ return atomic_read(&cp->cp_state) == RDS_CONN_DOWN;
+}
+
+static inline int
rds_conn_up(struct rds_connection *conn)
{
WARN_ON(conn->c_trans->t_mp_capable);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c8404971d5ab..aba4afe4dfed 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
{
struct rds_notifier *notifier;
- struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
+ struct rds_rdma_notify cmsg;
unsigned int count = 0, max_messages = ~0U;
unsigned long flags;
LIST_HEAD(copy);
int err = 0;
+ memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */
/* put_cmsg copies to user space and thus may sleep. We can't do this
* with rs_lock held, so first grab as many notifications as we can stuff
diff --git a/net/rds/send.c b/net/rds/send.c
index 68e2bdb08fd0..9a529a01cdc6 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1340,7 +1340,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
goto out;
}
- rds_conn_path_connect_if_down(cpath);
+ if (rds_conn_path_down(cpath))
+ rds_check_all_paths(conn);
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
if (ret) {
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 46f709a4b577..f8001ec80867 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -38,6 +38,12 @@
#include "rds.h"
#include "loop.h"
+static char * const rds_trans_modules[] = {
+ [RDS_TRANS_IB] = "rds_rdma",
+ [RDS_TRANS_GAP] = NULL,
+ [RDS_TRANS_TCP] = "rds_tcp",
+};
+
static struct rds_transport *transports[RDS_TRANS_COUNT];
static DECLARE_RWSEM(rds_trans_sem);
@@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type)
{
struct rds_transport *ret = NULL;
struct rds_transport *trans;
- unsigned int i;
down_read(&rds_trans_sem);
- for (i = 0; i < RDS_TRANS_COUNT; i++) {
- trans = transports[i];
-
- if (trans && trans->t_type == t_type &&
- (!trans->t_owner || try_module_get(trans->t_owner))) {
- ret = trans;
- break;
- }
+ trans = transports[t_type];
+ if (!trans) {
+ up_read(&rds_trans_sem);
+ if (rds_trans_modules[t_type])
+ request_module(rds_trans_modules[t_type]);
+ down_read(&rds_trans_sem);
+ trans = transports[t_type];
}
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner)))
+ ret = trans;
+
up_read(&rds_trans_sem);
return ret;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index e7a872207b46..ce85656ac9c1 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -71,6 +71,7 @@ ax25_address rose_callsign;
* separate class since they always nest.
*/
static struct lock_class_key rose_netdev_xmit_lock_key;
+static struct lock_class_key rose_netdev_addr_lock_key;
static void rose_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -81,6 +82,7 @@ static void rose_set_lockdep_one(struct net_device *dev,
static void rose_set_lockdep_key(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9fe264bec70c..9a2139ebd67d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -810,100 +810,6 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
}
/*
- * Transition a call to the complete state.
- */
-static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->abort_code = abort_code;
- call->error = error;
- call->completion = compl,
- call->state = RXRPC_CALL_COMPLETE;
- trace_rxrpc_call_complete(call);
- wake_up(&call->waitq);
- return true;
- }
- return false;
-}
-
-static inline bool rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Record that a call successfully completed.
- */
-static inline bool __rxrpc_call_completed(struct rxrpc_call *call)
-{
- return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
-}
-
-static inline bool rxrpc_call_completed(struct rxrpc_call *call)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Record that a call is locally aborted.
- */
-static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq,
- u32 abort_code, int error)
-{
- trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
- abort_code, error);
- return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
- abort_code, error);
-}
-
-static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Abort a call due to a protocol error.
- */
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
- struct sk_buff *skb,
- const char *eproto_why,
- const char *why,
- u32 abort_code)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
- return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
-}
-
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
- __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
- (abort_why), (abort_code))
-
-/*
* conn_client.c
*/
extern unsigned int rxrpc_max_client_connections;
@@ -1101,9 +1007,34 @@ extern const struct seq_operations rxrpc_peer_seq_ops;
* recvmsg.c
*/
void rxrpc_notify_socket(struct rxrpc_call *);
+bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
+bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
+bool __rxrpc_call_completed(struct rxrpc_call *);
+bool rxrpc_call_completed(struct rxrpc_call *);
+bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
+bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
+ * Abort a call due to a protocol error.
+ */
+static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ const char *eproto_why,
+ const char *why,
+ u32 abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
+ return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+}
+
+#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
+ __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
+ (abort_why), (abort_code))
+
+/*
* rtt.c
*/
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index b7611cc159e5..032ed76c0166 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -22,6 +22,11 @@
#include <net/ip.h>
#include "ar-internal.h"
+static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call,
+ unsigned long user_call_ID)
+{
+}
+
/*
* Preallocate a single service call, connection and peer and, if possible,
* give them a user ID and attach the user's side of the ID to them.
@@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
+ if (call->notify_rx)
+ call->notify_rx = rxrpc_dummy_notify;
rxrpc_put_call(call, rxrpc_call_put_kernel);
}
rxrpc_call_completed(call);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 2a65ac41055f..6be2672a65ea 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -248,7 +248,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
if (anno_type != RXRPC_TX_ANNO_RETRANS)
continue;
+ /* We need to reset the retransmission state, but we need to do
+ * so before we drop the lock as a new ACK/NAK may come in and
+ * confuse things
+ */
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT;
+ call->rxtx_annotations[ix] = annotation;
+
skb = call->rxtx_buffer[ix];
+ if (!skb)
+ continue;
+
rxrpc_get_skb(skb, rxrpc_skb_got);
spin_unlock_bh(&call->lock);
@@ -262,24 +273,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_free_skb(skb, rxrpc_skb_freed);
spin_lock_bh(&call->lock);
-
- /* We need to clear the retransmit state, but there are two
- * things we need to be aware of: A new ACK/NAK might have been
- * received and the packet might have been hard-ACK'd (in which
- * case it will no longer be in the buffer).
- */
- if (after(seq, call->tx_hard_ack)) {
- annotation = call->rxtx_annotations[ix];
- anno_type = annotation & RXRPC_TX_ANNO_MASK;
- if (anno_type == RXRPC_TX_ANNO_RETRANS ||
- anno_type == RXRPC_TX_ANNO_NAK) {
- annotation &= ~RXRPC_TX_ANNO_MASK;
- annotation |= RXRPC_TX_ANNO_UNACK;
- }
- annotation |= RXRPC_TX_ANNO_RESENT;
- call->rxtx_annotations[ix] = annotation;
- }
-
if (after(call->tx_hard_ack, seq))
seq = call->tx_hard_ack;
}
@@ -320,7 +313,6 @@ recheck_state:
if (call->state == RXRPC_CALL_COMPLETE) {
del_timer_sync(&call->timer);
- rxrpc_notify_socket(call);
goto out_put;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f07970207b54..38a46167523f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
*/
ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
if (ret < 0)
- goto error;
+ goto error_attached_to_socket;
trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
atomic_read(&call->usage), here, NULL);
@@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
error_dup_user_ID:
write_unlock(&rx->call_lock);
release_sock(&rx->sk);
- ret = -EEXIST;
-
-error:
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, ret);
+ RX_CALL_DEAD, -EEXIST);
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(ret));
+ atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ _leave(" = -EEXIST");
+ return ERR_PTR(-EEXIST);
+
+ /* We got an error, but the call is attached to the socket and is in
+ * need of release. However, we might now race with recvmsg() when
+ * completing the call queues it. Return 0 from sys_sendmsg() and
+ * leave the error to recvmsg() to deal with.
+ */
+error_attached_to_socket:
+ trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+ atomic_read(&call->usage), here, ERR_PTR(ret));
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_CALL_DEAD, ret);
+ _leave(" = c=%08x [err]", call->debug_id);
+ return call;
}
/*
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 06fcff2ebbba..447f55ca6886 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -173,10 +173,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
else
trace_rxrpc_rx_abort(call, serial,
conn->abort_code);
- if (rxrpc_set_call_completion(call, compl,
- conn->abort_code,
- conn->error))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, compl,
+ conn->abort_code,
+ conn->error);
}
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 19e141eeed17..8cbe0bf20ed5 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
call->peer->cong_cwnd = call->cong_cwnd;
- spin_lock_bh(&conn->params.peer->lock);
- hlist_del_rcu(&call->error_link);
- spin_unlock_bh(&conn->params.peer->lock);
+ if (!hlist_unhashed(&call->error_link)) {
+ spin_lock_bh(&call->peer->lock);
+ hlist_del_rcu(&call->error_link);
+ spin_unlock_bh(&call->peer->lock);
+ }
if (rxrpc_is_client_call(call))
return rxrpc_disconnect_client_call(call);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 3be4177baf70..767579328a06 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -275,7 +275,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
case RXRPC_CALL_SERVER_AWAIT_ACK:
__rxrpc_call_completed(call);
- rxrpc_notify_socket(call);
state = call->state;
break;
@@ -723,13 +722,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (call->tx_winsize != rwind) {
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (rwind > call->tx_winsize)
wake = true;
- trace_rxrpc_rx_rwind_change(call, sp->hdr.serial,
- ntohl(ackinfo->rwind), wake);
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake);
call->tx_winsize = rwind;
}
@@ -1013,9 +1011,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
- if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, -ECONNABORTED))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ abort_code, -ECONNABORTED);
}
/*
@@ -1102,7 +1099,6 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
spin_lock(&rx->incoming_lock);
__rxrpc_disconnect_call(conn, call);
spin_unlock(&rx->incoming_lock);
- rxrpc_notify_socket(call);
}
/*
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 112e490ebbcd..a852f46d5234 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -292,9 +292,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
rxrpc_see_call(call);
- if (call->state < RXRPC_CALL_COMPLETE &&
- rxrpc_set_call_completion(call, compl, 0, -error))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, compl, 0, -error);
}
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8578c39ec839..efecc5a8f67d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -59,6 +59,85 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
}
/*
+ * Transition a call to the complete state.
+ */
+bool __rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl,
+ call->state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+ }
+ return false;
+}
+
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ bool ret = false;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ }
+ return ret;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool __rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ bool ret = false;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_call_completed(call);
+ write_unlock_bh(&call->state_lock);
+ }
+ return ret;
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq, u32 abort_code, int error)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error);
+}
+
+bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq, u32 abort_code, int error)
+{
+ bool ret;
+
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ return ret;
+}
+
+/*
* Pass a call terminating message to userspace.
*/
static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
@@ -464,7 +543,7 @@ try_again:
list_empty(&rx->recvmsg_q) &&
rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
release_sock(&rx->sk);
- return -ENODATA;
+ return -EAGAIN;
}
if (list_empty(&rx->recvmsg_q)) {
@@ -541,7 +620,7 @@ try_again:
goto error_unlock_call;
}
- if (msg->msg_name) {
+ if (msg->msg_name && call->peer) {
struct sockaddr_rxrpc *srx = msg->msg_name;
size_t len = sizeof(call->peer->srx);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5e9c43d4a314..f3f6da6e4ad2 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -261,10 +261,8 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
case -ENETUNREACH:
case -EHOSTUNREACH:
case -ECONNREFUSED:
- rxrpc_set_call_completion(call,
- RXRPC_CALL_LOCAL_ERROR,
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
0, ret);
- rxrpc_notify_socket(call);
goto out;
}
_debug("need instant resend %d", ret);
@@ -306,7 +304,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
return -EPIPE;
more = msg->msg_flags & MSG_MORE;
@@ -683,6 +681,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
if (IS_ERR(call))
return PTR_ERR(call);
/* ... and we have the call lock. */
+ ret = 0;
+ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
+ goto out_put_unlock;
} else {
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_UNINITIALISED:
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f20073f4f84..84badf00647e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -6,7 +6,7 @@
menuconfig NET_SCHED
bool "QoS and/or fair queueing"
select NET_SCH_FIFO
- ---help---
+ help
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
delay, and which ones to drop. This is the job of the queueing
@@ -47,7 +47,7 @@ comment "Queueing/Scheduling"
config NET_SCH_CBQ
tristate "Class Based Queueing (CBQ)"
- ---help---
+ help
Say Y here if you want to use the Class-Based Queueing (CBQ) packet
scheduling algorithm. This algorithm classifies the waiting packets
into a tree-like hierarchy of classes; the leaves of this tree are
@@ -64,7 +64,7 @@ config NET_SCH_CBQ
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Token Buckets (HTB)
packet scheduling algorithm. See
<http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
@@ -78,7 +78,7 @@ config NET_SCH_HTB
config NET_SCH_HFSC
tristate "Hierarchical Fair Service Curve (HFSC)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Fair Service Curve
(HFSC) packet scheduling algorithm.
@@ -88,7 +88,7 @@ config NET_SCH_HFSC
config NET_SCH_ATM
tristate "ATM Virtual Circuits (ATM)"
depends on ATM
- ---help---
+ help
Say Y here if you want to use the ATM pseudo-scheduler. This
provides a framework for invoking classifiers, which in turn
select classes of this queuing discipline. Each class maps
@@ -101,7 +101,7 @@ config NET_SCH_ATM
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
- ---help---
+ help
Say Y here if you want to use an n-band priority queue packet
scheduler.
@@ -110,7 +110,7 @@ config NET_SCH_PRIO
config NET_SCH_MULTIQ
tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
- ---help---
+ help
Say Y here if you want to use an n-band queue packet scheduler
to support devices that have multiple hardware transmit queues.
@@ -119,7 +119,7 @@ config NET_SCH_MULTIQ
config NET_SCH_RED
tristate "Random Early Detection (RED)"
- ---help---
+ help
Say Y here if you want to use the Random Early Detection (RED)
packet scheduling algorithm.
@@ -130,7 +130,7 @@ config NET_SCH_RED
config NET_SCH_SFB
tristate "Stochastic Fair Blue (SFB)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fair Blue (SFB)
packet scheduling algorithm.
@@ -141,7 +141,7 @@ config NET_SCH_SFB
config NET_SCH_SFQ
tristate "Stochastic Fairness Queueing (SFQ)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
packet scheduling algorithm.
@@ -152,7 +152,7 @@ config NET_SCH_SFQ
config NET_SCH_TEQL
tristate "True Link Equalizer (TEQL)"
- ---help---
+ help
Say Y here if you want to use the True Link Equalizer (TLE) packet
scheduling algorithm. This queueing discipline allows the combination
of several physical devices into one virtual device.
@@ -164,7 +164,7 @@ config NET_SCH_TEQL
config NET_SCH_TBF
tristate "Token Bucket Filter (TBF)"
- ---help---
+ help
Say Y here if you want to use the Token Bucket Filter (TBF) packet
scheduling algorithm.
@@ -175,7 +175,7 @@ config NET_SCH_TBF
config NET_SCH_CBS
tristate "Credit Based Shaper (CBS)"
- ---help---
+ help
Say Y here if you want to use the Credit Based Shaper (CBS) packet
scheduling algorithm.
@@ -208,7 +208,7 @@ config NET_SCH_TAPRIO
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
- ---help---
+ help
Say Y here if you want to use the Generic Random Early Detection
(GRED) packet scheduling algorithm for some of your network devices
(see the top of <file:net/sched/sch_red.c> for details and
@@ -219,7 +219,7 @@ config NET_SCH_GRED
config NET_SCH_DSMARK
tristate "Differentiated Services marker (DSMARK)"
- ---help---
+ help
Say Y if you want to schedule packets according to the
Differentiated Services architecture proposed in RFC 2475.
Technical information on this method, with pointers to associated
@@ -230,7 +230,7 @@ config NET_SCH_DSMARK
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
- ---help---
+ help
Say Y if you want to emulate network delay, loss, and packet
re-ordering. This is often useful to simulate networks when
testing applications or protocols.
@@ -384,7 +384,7 @@ config NET_SCH_INGRESS
depends on NET_CLS_ACT
select NET_INGRESS
select NET_EGRESS
- ---help---
+ help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
which can also have actions attached to them. In case of outgoing packets,
@@ -398,7 +398,7 @@ config NET_SCH_INGRESS
config NET_SCH_PLUG
tristate "Plug network traffic until release (PLUG)"
- ---help---
+ help
This queuing discipline allows userspace to plug/unplug a network
output queue, using the netlink interface. When it receives an
@@ -441,7 +441,7 @@ config NET_SCH_ETS
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
- ---help---
+ help
Support for selection of default queuing discipline.
Nearly all users can safely say no here, and the default
@@ -492,7 +492,7 @@ config NET_CLS
config NET_CLS_BASIC
tristate "Elementary classification (BASIC)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets using
only extended matches and actions.
@@ -502,7 +502,7 @@ config NET_CLS_BASIC
config NET_CLS_TCINDEX
tristate "Traffic-Control Index (TCINDEX)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
traffic control indices. You will want this feature if you want
to implement Differentiated Services together with DSMARK.
@@ -515,7 +515,7 @@ config NET_CLS_ROUTE4
depends on INET
select IP_ROUTE_CLASSID
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to the route table entry they matched.
@@ -525,7 +525,7 @@ config NET_CLS_ROUTE4
config NET_CLS_FW
tristate "Netfilter mark (FW)"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to netfilter/firewall marks.
@@ -535,7 +535,7 @@ config NET_CLS_FW
config NET_CLS_U32
tristate "Universal 32bit comparisons w/ hashing (U32)"
select NET_CLS
- ---help---
+ help
Say Y here to be able to classify packets using a universal
32bit pieces based comparison scheme.
@@ -545,20 +545,20 @@ config NET_CLS_U32
config CLS_U32_PERF
bool "Performance counters support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to make u32 gather additional statistics useful for
fine tuning u32 classifiers.
config CLS_U32_MARK
bool "Netfilter marks support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to be able to use netfilter marks as u32 key.
config NET_CLS_RSVP
tristate "IPv4 Resource Reservation Protocol (RSVP)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -572,7 +572,7 @@ config NET_CLS_RSVP
config NET_CLS_RSVP6
tristate "IPv6 Resource Reservation Protocol (RSVP6)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -586,7 +586,7 @@ config NET_CLS_RSVP6
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys. This is mostly useful
in combination with SFQ.
@@ -599,7 +599,7 @@ config NET_CLS_CGROUP
select NET_CLS
select CGROUP_NET_CLASSID
depends on CGROUPS
- ---help---
+ help
Say Y here if you want to classify packets based on the control
cgroup of their process.
@@ -609,7 +609,7 @@ config NET_CLS_CGROUP
config NET_CLS_BPF
tristate "BPF-based classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
programmable BPF (JIT'ed) filters as an alternative to ematches.
@@ -619,7 +619,7 @@ config NET_CLS_BPF
config NET_CLS_FLOWER
tristate "Flower classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys and masks.
@@ -629,7 +629,7 @@ config NET_CLS_FLOWER
config NET_CLS_MATCHALL
tristate "Match-all classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
nothing. Every packet will match.
@@ -639,7 +639,7 @@ config NET_CLS_MATCHALL
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use extended matches on top of classifiers
and select the extended matches below.
@@ -653,7 +653,7 @@ config NET_EMATCH_STACK
int "Stack size"
depends on NET_EMATCH
default "32"
- ---help---
+ help
Size of the local stack variable used while evaluating the tree of
ematches. Limits the depth of the tree, i.e. the number of
encapsulated precedences. Every level requires 4 bytes of additional
@@ -662,7 +662,7 @@ config NET_EMATCH_STACK
config NET_EMATCH_CMP
tristate "Simple packet data comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
simple packet data comparisons for 8, 16, and 32bit values.
@@ -672,7 +672,7 @@ config NET_EMATCH_CMP
config NET_EMATCH_NBYTE
tristate "Multi byte comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
multiple byte comparisons mainly useful for IPv6 address comparisons.
@@ -682,7 +682,7 @@ config NET_EMATCH_NBYTE
config NET_EMATCH_U32
tristate "U32 key"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets using
the famous u32 key in combination with logic relations.
@@ -692,7 +692,7 @@ config NET_EMATCH_U32
config NET_EMATCH_META
tristate "Metadata"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
metadata such as load average, netfilter attributes, socket
attributes and routing decisions.
@@ -707,7 +707,7 @@ config NET_EMATCH_TEXT
select TEXTSEARCH_KMP
select TEXTSEARCH_BM
select TEXTSEARCH_FSM
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
textsearch comparisons.
@@ -717,7 +717,7 @@ config NET_EMATCH_TEXT
config NET_EMATCH_CANID
tristate "CAN Identifier"
depends on NET_EMATCH && (CAN=y || CAN=m)
- ---help---
+ help
Say Y here if you want to be able to classify CAN frames based
on CAN Identifier.
@@ -727,7 +727,7 @@ config NET_EMATCH_CANID
config NET_EMATCH_IPSET
tristate "IPset"
depends on NET_EMATCH && IP_SET
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
ipset membership.
@@ -737,7 +737,7 @@ config NET_EMATCH_IPSET
config NET_EMATCH_IPT
tristate "IPtables Matches"
depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
- ---help---
+ help
Say Y here to be able to classify packets based on iptables
matches.
Current supported match is "policy" which allows packet classification
@@ -749,7 +749,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
classification. They are used to overwrite the classification
@@ -761,7 +761,7 @@ config NET_CLS_ACT
config NET_ACT_POLICE
tristate "Traffic Policing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to do traffic policing, i.e. strict
bandwidth limiting. This action replaces the existing policing
module.
@@ -772,7 +772,7 @@ config NET_ACT_POLICE
config NET_ACT_GACT
tristate "Generic actions"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to take generic actions such as dropping and
accepting packets.
@@ -782,13 +782,13 @@ config NET_ACT_GACT
config GACT_PROB
bool "Probability support"
depends on NET_ACT_GACT
- ---help---
+ help
Say Y here to use the generic action randomly or deterministically.
config NET_ACT_MIRRED
tristate "Redirecting and Mirroring"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow packets to be mirrored or redirected to
other devices.
@@ -799,7 +799,7 @@ config NET_ACT_SAMPLE
tristate "Traffic Sampling"
depends on NET_CLS_ACT
select PSAMPLE
- ---help---
+ help
Say Y here to allow packet sampling tc action. The packet sample
action consists of statistically choosing packets and sampling
them using the psample module.
@@ -810,7 +810,7 @@ config NET_ACT_SAMPLE
config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- ---help---
+ help
Say Y here to be able to invoke iptables targets after successful
classification.
@@ -820,7 +820,7 @@ config NET_ACT_IPT
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to do stateless NAT on IPv4 packets. You should use
netfilter for NAT unless you know what you are doing.
@@ -830,7 +830,7 @@ config NET_ACT_NAT
config NET_ACT_PEDIT
tristate "Packet Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to mangle the content of packets.
To compile this code as a module, choose M here: the
@@ -839,7 +839,7 @@ config NET_ACT_PEDIT
config NET_ACT_SIMP
tristate "Simple Example (Debug)"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to add a simple action for demonstration purposes.
It is meant as an example and for debugging purposes. It will
print a configured policy string followed by the packet count
@@ -853,7 +853,7 @@ config NET_ACT_SIMP
config NET_ACT_SKBEDIT
tristate "SKB Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to change skb priority or queue_mapping settings.
If unsure, say N.
@@ -865,7 +865,7 @@ config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
select LIBCRC32C
- ---help---
+ help
Say Y here to update some common checksum after some direct
packet alterations.
@@ -886,7 +886,7 @@ config NET_ACT_MPLS
config NET_ACT_VLAN
tristate "Vlan manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to push or pop vlan headers.
If unsure, say N.
@@ -897,7 +897,7 @@ config NET_ACT_VLAN
config NET_ACT_BPF
tristate "BPF based action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to execute BPF code on packets. The BPF code will decide
if the packet should be dropped or not.
@@ -910,7 +910,7 @@ config NET_ACT_CONNMARK
tristate "Netfilter Connection Mark Retriever"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- ---help---
+ help
Say Y here to allow retrieving of conn mark
If unsure, say N.
@@ -938,7 +938,7 @@ config NET_ACT_CTINFO
config NET_ACT_SKBMOD
tristate "skb data modification action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow modification of skb data
If unsure, say N.
@@ -950,7 +950,7 @@ config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
select NET_IFE
- ---help---
+ help
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
"Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -962,7 +962,7 @@ config NET_ACT_IFE
config NET_ACT_TUNNEL_KEY
tristate "IP tunnel metadata manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to set/release ip tunnel metadata.
If unsure, say N.
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 43a243081e7d..f901421b0634 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
- if (skb->protocol == htons(ETH_P_IP)) {
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
if (skb->len < sizeof(struct iphdr))
goto out;
proto = NFPROTO_IPV4;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ break;
+ case htons(ETH_P_IPV6):
if (skb->len < sizeof(struct ipv6hdr))
goto out;
proto = NFPROTO_IPV6;
- } else {
+ break;
+ default:
goto out;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index cb8608f0a77a..c60674cf25c4 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
update_flags = params->update_flags;
- protocol = tc_skb_protocol(skb);
+ protocol = skb_protocol(skb, false);
again:
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index e29f0f45d688..6ed1652d1e26 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -624,7 +624,7 @@ static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
{
u8 family = NFPROTO_UNSPEC;
- switch (skb->protocol) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
family = NFPROTO_IPV4;
break;
@@ -673,9 +673,10 @@ static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag)
}
static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
- u8 family, u16 zone)
+ u8 family, u16 zone, bool *defrag)
{
enum ip_conntrack_info ctinfo;
+ struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
@@ -693,6 +694,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
+ cb = *qdisc_skb_cb(skb);
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -703,6 +705,9 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
local_bh_enable();
if (err && err != -EINPROGRESS)
goto out_free;
+
+ if (!err)
+ *defrag = true;
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
@@ -711,12 +716,16 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
err = nf_ct_frag6_gather(net, skb, user);
if (err && err != -EINPROGRESS)
goto out_free;
+
+ if (!err)
+ *defrag = true;
#else
err = -EOPNOTSUPP;
goto out_free;
#endif
}
+ *qdisc_skb_cb(skb) = cb;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -748,6 +757,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype)
{
+ __be16 proto = skb_protocol(skb, true);
int hooknum, err = NF_ACCEPT;
/* See HOOK2MANIP(). */
@@ -759,14 +769,13 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
- if (skb->protocol == htons(ETH_P_IP) &&
+ if (proto == htons(ETH_P_IP) &&
ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
hooknum))
err = NF_DROP;
goto out;
- } else if (IS_ENABLED(CONFIG_IPV6) &&
- skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
__be16 frag_off;
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
int hdrlen = ipv6_skip_exthdr(skb,
@@ -914,6 +923,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
int nh_ofs, err, retval;
struct tcf_ct_params *p;
bool skip_add = false;
+ bool defrag = false;
struct nf_conn *ct;
u8 family;
@@ -925,6 +935,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
force = p->ct_action & TCA_CT_ACT_FORCE;
tmpl = p->tmpl;
+ tcf_lastuse_update(&c->tcf_tm);
+
if (clear) {
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
@@ -944,7 +956,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
*/
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
- err = tcf_ct_handle_fragments(net, skb, family, p->zone);
+ err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
if (err == -EINPROGRESS) {
retval = TC_ACT_STOLEN;
goto out;
@@ -1012,6 +1024,8 @@ out_push:
out:
tcf_action_update_bstats(&c->common, skb);
+ if (defrag)
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
drop:
@@ -1529,10 +1543,10 @@ static int __init ct_init_module(void)
return 0;
-err_tbl_init:
- destroy_workqueue(act_ct_wq);
err_register:
tcf_ct_flow_tables_uninit();
+err_tbl_init:
+ destroy_workqueue(act_ct_wq);
return err;
}
@@ -1543,17 +1557,6 @@ static void __exit ct_cleanup_module(void)
destroy_workqueue(act_ct_wq);
}
-void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie)
-{
- enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK;
- struct nf_conn *ct;
-
- ct = (struct nf_conn *)(cookie & NFCT_PTRMASK);
- nf_conntrack_get(&ct->ct_general);
- nf_ct_set(skb, ct, ctinfo);
-}
-EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb);
-
module_init(ct_init_module);
module_exit(ct_cleanup_module);
MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
@@ -1561,4 +1564,3 @@ MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
MODULE_DESCRIPTION("Connection tracking action");
MODULE_LICENSE("GPL v2");
-
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 19649623493b..b5042f3ea079 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
- if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen))
goto out;
proto = NFPROTO_IPV4;
- } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+ break;
+ case htons(ETH_P_IPV6):
wlen += sizeof(struct ipv6hdr);
if (!pskb_may_pull(skb, wlen))
goto out;
proto = NFPROTO_IPV6;
- } else {
+ break;
+ default:
goto out;
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9c628591f452..323ae7f6315d 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,7 +32,7 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
{
struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
@@ -43,18 +43,13 @@ static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
if (ktime_after(base, now)) {
*start = base;
- return 0;
+ return;
}
cycle = param->tcfg_cycletime;
- /* cycle time should not be zero */
- if (!cycle)
- return -EFAULT;
-
n = div64_u64(ktime_sub_ns(now, base), cycle);
*start = ktime_add_ns(base, (n + 1) * cycle);
- return 0;
}
static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
@@ -277,6 +272,27 @@ release_list:
return err;
}
+static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
+ enum tk_offsets tko, s32 clockid,
+ bool do_init)
+{
+ if (!do_init) {
+ if (basetime == gact->param.tcfg_basetime &&
+ tko == gact->tk_offset &&
+ clockid == gact->param.tcfg_clockid)
+ return;
+
+ spin_unlock_bh(&gact->tcf_lock);
+ hrtimer_cancel(&gact->hitimer);
+ spin_lock_bh(&gact->tcf_lock);
+ }
+ gact->param.tcfg_basetime = basetime;
+ gact->param.tcfg_clockid = clockid;
+ gact->tk_offset = tko;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+}
+
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
@@ -287,12 +303,12 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
enum tk_offsets tk_offset = TK_OFFS_TAI;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
+ u64 cycletime = 0, basetime = 0;
struct tcf_gate_params *p;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
int ret = 0, err;
- u64 basetime = 0;
u32 gflags = 0;
s32 prio = -1;
ktime_t start;
@@ -308,6 +324,27 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
+ }
+ }
+
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
@@ -331,10 +368,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
- if (ret == ACT_P_CREATED) {
- to_gate(*a)->param.tcfg_clockid = -1;
- INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
- }
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -345,41 +378,19 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
- if (tb[TCA_GATE_CLOCKID]) {
- clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- goto release_idr;
- }
- }
+ gact = to_gate(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&gact->param.entries);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
- gact = to_gate(*a);
-
spin_lock_bh(&gact->tcf_lock);
p = &gact->param;
- if (tb[TCA_GATE_CYCLE_TIME]) {
- p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- if (!p->tcfg_cycletime_ext)
- goto chain_put;
- }
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
if (tb[TCA_GATE_ENTRY_LIST]) {
err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
@@ -387,35 +398,29 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
goto chain_put;
}
- if (!p->tcfg_cycletime) {
+ if (!cycletime) {
struct tcfg_gate_entry *entry;
ktime_t cycle = 0;
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- p->tcfg_cycletime = cycle;
+ cycletime = cycle;
+ if (!cycletime) {
+ err = -EINVAL;
+ goto chain_put;
+ }
}
+ p->tcfg_cycletime = cycletime;
if (tb[TCA_GATE_CYCLE_TIME_EXT])
p->tcfg_cycletime_ext =
nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ gate_setup_timer(gact, basetime, tk_offset, clockid,
+ ret == ACT_P_CREATED);
p->tcfg_priority = prio;
- p->tcfg_basetime = basetime;
- p->tcfg_clockid = clockid;
p->tcfg_flags = gflags;
-
- gact->tk_offset = tk_offset;
- hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
- gact->hitimer.function = gate_timer_func;
-
- err = gate_get_start_time(gact, &start);
- if (err < 0) {
- NL_SET_ERR_MSG(extack,
- "Internal error: failed get start time");
- release_entry_list(&p->entries);
- goto chain_put;
- }
+ gate_get_start_time(gact, &start);
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -443,6 +448,13 @@ chain_put:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ /* action is not inserted in any list: it's safe to init hitimer
+ * without taking tcf_lock.
+ */
+ if (ret == ACT_P_CREATED)
+ gate_setup_timer(gact, gact->param.tcfg_basetime,
+ gact->tk_offset, gact->param.tcfg_clockid,
+ true);
tcf_idr_release(*a, bind);
return err;
}
@@ -453,9 +465,7 @@ static void tcf_gate_cleanup(struct tc_action *a)
struct tcf_gate_params *p;
p = &gact->param;
- if (p->tcfg_clockid != -1)
- hrtimer_cancel(&gact->hitimer);
-
+ hrtimer_cancel(&gact->hitimer);
release_entry_list(&p->entries);
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index be3f215cd027..8118e2640979 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
break;
case TCA_MPLS_ACT_PUSH:
- new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
+ new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true)));
if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len,
skb->dev && skb->dev->type == ARPHRD_ETHER))
goto drop;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index b125b2be4467..b2b3faa57294 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a00a203b2ef5..4619cb3cb0a8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -20,7 +20,6 @@
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/idr.h>
-#include <linux/rhashtable.h>
#include <linux/jhash.h>
#include <linux/rculist.h>
#include <net/net_namespace.h>
@@ -652,6 +651,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
&block->flow_block, tcf_block_shared(block),
&extack);
down_write(&block->cb_lock);
+ list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
up_write(&block->cb_lock);
rtnl_lock();
@@ -671,25 +671,29 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
struct netlink_ext_ack *extack)
{
struct flow_block_offload bo = {};
- int err;
tcf_block_offload_init(&bo, dev, command, ei->binder_type,
&block->flow_block, tcf_block_shared(block),
extack);
- if (dev->netdev_ops->ndo_setup_tc)
+ if (dev->netdev_ops->ndo_setup_tc) {
+ int err;
+
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
- else
- err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block,
- &bo, tc_block_indr_cleanup);
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
+ return err;
+ }
- if (err < 0) {
- if (err != -EOPNOTSUPP)
- NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
- return err;
+ return tcf_block_setup(block, &bo);
}
- return tcf_block_setup(block, &bo);
+ flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo,
+ tc_block_indr_cleanup);
+ tcf_block_setup(block, &bo);
+
+ return -EOPNOTSUPP;
}
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
@@ -1533,7 +1537,7 @@ static inline int __tcf_classify(struct sk_buff *skb,
reclassify:
#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
- __be16 protocol = tc_skb_protocol(skb);
+ __be16 protocol = skb_protocol(skb, false);
int err;
if (tp->protocol != protocol &&
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 80ae7b9fa90a..87398af2715a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
if (dst)
return ntohl(dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_proto(const struct sk_buff *skb,
@@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb,
if (flow->ports.ports)
return ntohs(flow->ports.dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -164,7 +164,7 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
@@ -225,7 +225,7 @@ static u32 flow_get_skgid(const struct sk_buff *skb)
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
- u16 uninitialized_var(tag);
+ u16 tag;
if (vlan_get_tag(skb, &tag) < 0)
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b2da37286082..e30bd969fc48 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -313,7 +313,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* skb_flow_dissect() does not set n_proto in case an unknown
* protocol, so do it rather here.
*/
- skb_key.basic.n_proto = skb->protocol;
+ skb_key.basic.n_proto = skb_protocol(skb, false);
skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df00566d327d..c95cf86fb431 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
};
int ret, network_offset;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 18755d29fd15..3650117da47f 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
struct nf_hook_state state;
int ret;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index d99966a55c84..46254968d390 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -195,7 +195,7 @@ META_COLLECTOR(int_priority)
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = tc_skb_protocol(skb);
+ dst->value = skb_protocol(skb, false);
}
META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ee12ca9f55b4..1c281cc81f57 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
+ p->link.vcc = NULL;
+ p->link.sock = NULL;
+ p->link.common.classid = sch->handle;
+ p->link.ref = 1;
err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
extack);
if (err)
return err;
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
return 0;
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 60f8ae578819..c7b587897585 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -592,7 +592,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys,
bool rev = !skb->_nfct, upd = false;
__be32 ip;
- if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+ if (skb_protocol(skb, true) != htons(ETH_P_IP))
return false;
if (!nf_ct_get_tuple_skb(&tuple, skb))
@@ -1551,32 +1551,51 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
{
- int wlen = skb_network_offset(skb);
+ const int offset = skb_network_offset(skb);
+ u16 *buf, buf_;
u8 dscp;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* ToS is in the second byte of iphdr */
+ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct iphdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* Traffic class is in the first and second bytes of ipv6hdr */
+ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_ARP):
@@ -1593,14 +1612,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin, mark;
+ bool wash;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
- * using firewall marks or skb->priority.
+ * using firewall marks or skb->priority. Call DSCP parsing early if
+ * wash is enabled, otherwise defer to below to skip unneeded parsing.
*/
- dscp = cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH);
mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+ wash = !!(q->rate_flags & CAKE_FLAG_WASH);
+ if (wash)
+ dscp = cake_handle_diffserv(skb, wash);
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
@@ -1614,6 +1636,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
else {
+ if (!wash)
+ dscp = cake_handle_diffserv(skb, wash);
tin = q->tin_index[dscp];
if (unlikely(tin >= q->tin_cnt))
@@ -1668,7 +1692,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
int len = qdisc_pkt_len(skb);
- int uninitialized_var(ret);
+ int ret;
struct sk_buff *ack = NULL;
ktime_t now = ktime_get();
struct cake_tin_data *b;
@@ -2691,7 +2715,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
if (opt) {
- int err = cake_change(sch, opt, extack);
+ err = cake_change(sch, opt, extack);
if (err)
return err;
@@ -3008,7 +3032,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
PUT_STAT_S32(BLUE_TIMER_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.blue_timer)));
+ flow->cvars.blue_timer)));
}
if (flow->cvars.dropping) {
PUT_STAT_S32(DROP_NEXT_US,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 39b427dc7512..ce4519358106 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -360,7 +360,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- int uninitialized_var(ret);
+ int ret;
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 05605b30bef3..2b88710994d7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (p->set_tc_index) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen) ||
@@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
p->mv[index].value);
@@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
*/
if (p->mv[index].mask != 0xff || p->mv[index].value)
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(tc_skb_protocol(skb)));
+ __func__, ntohs(skb_protocol(skb, true)));
break;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 8f06a808c59a..2fb76fc0cc31 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1075,3 +1075,4 @@ module_init(fq_module_init)
module_exit(fq_module_exit)
MODULE_AUTHOR("Eric Dumazet");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Fair Queue Packet Scheduler");
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 436160be9c18..985d5208f563 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -187,7 +187,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct fq_codel_sched_data *q = qdisc_priv(sch);
unsigned int idx, prev_backlog, prev_qlen;
struct fq_codel_flow *flow;
- int uninitialized_var(ret);
+ int ret;
unsigned int pkt_len;
bool memory_limited;
@@ -721,3 +721,4 @@ module_init(fq_codel_module_init)
module_exit(fq_codel_module_exit)
MODULE_AUTHOR("Eric Dumazet");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Fair Queue CoDel discipline");
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index fb760cee824e..4d307f17e084 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -130,7 +130,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct fq_pie_sched_data *q = qdisc_priv(sch);
struct fq_pie_flow *sel_flow;
- int uninitialized_var(ret);
+ int ret;
u8 memory_limited = false;
u8 enqueue = false;
u32 pkt_len;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b19a0021a0bd..265a61d011df 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -464,6 +464,7 @@ void __netdev_watchdog_up(struct net_device *dev)
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 433f2190960f..92ad4115e473 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1533,7 +1533,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct hfsc_class *cl;
- int uninitialized_var(err);
+ int err;
bool first;
cl = hfsc_classify(skb, sch, &err);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index be35f03b657b..420ede875322 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -721,3 +721,4 @@ module_exit(hhf_module_exit)
MODULE_AUTHOR("Terry Lam");
MODULE_AUTHOR("Nandita Dukkipati");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8184c87da8be..6feab225b4ba 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -579,7 +579,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- int uninitialized_var(ret);
+ int ret;
unsigned int len = qdisc_pkt_len(skb);
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = htb_classify(skb, sch, &ret);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 5a6def5e4e6d..15f400dcb400 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -349,7 +349,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
- int uninitialized_var(ret);
+ int ret;
struct sk_buff *head;
int delta;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 689ef6f3ded8..2f1f0a378408 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
haddr, NULL, skb->len);
if (err < 0)
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 68934438ee19..39d7fa9569f8 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -11,7 +11,7 @@ menuconfig IP_SCTP
select CRYPTO_HMAC
select CRYPTO_SHA1
select LIBCRC32C
- ---help---
+ help
Stream Control Transmission Protocol
From RFC 2960 <http://www.ietf.org/rfc/rfc2960.txt>.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 72315137d7e7..8d735461fa19 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
enum sctp_scope scope, gfp_t gfp)
{
+ struct sock *sk = asoc->base.sk;
int flags;
/* Use scoping rules to determine the subset of addresses from
* the endpoint.
*/
- flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ if (!inet_v6_ipv6only(sk))
+ flags |= SCTP_ADDR4_ALLOWED;
if (asoc->peer.ipv4_address)
flags |= SCTP_ADDR4_PEERSUPP;
if (asoc->peer.ipv6_address)
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 53bc61537f44..701c5a4e441d 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
* well as the remote peer.
*/
if ((((AF_INET == addr->sa.sa_family) &&
+ (flags & SCTP_ADDR4_ALLOWED) &&
(flags & SCTP_ADDR4_PEERSUPP))) ||
(((AF_INET6 == addr->sa.sa_family) &&
(flags & SCTP_ADDR6_ALLOWED) &&
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 092d1afdee0d..cde29f3c7fb3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
* sock as well as the remote peer.
*/
if (addr->a.sa.sa_family == AF_INET &&
- !(copy_flags & SCTP_ADDR4_PEERSUPP))
+ (!(copy_flags & SCTP_ADDR4_ALLOWED) ||
+ !(copy_flags & SCTP_ADDR4_PEERSUPP)))
continue;
if (addr->a.sa.sa_family == AF_INET6 &&
(!(copy_flags & SCTP_ADDR6_ALLOWED) ||
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 67f7e71f9129..bda2536dd740 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -22,17 +22,11 @@
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
-/* Migrates chunks from stream queues to new stream queues if needed,
- * but not across associations. Also, removes those chunks to streams
- * higher than the new max.
- */
-static void sctp_stream_outq_migrate(struct sctp_stream *stream,
- struct sctp_stream *new, __u16 outcnt)
+static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt)
{
struct sctp_association *asoc;
struct sctp_chunk *ch, *temp;
struct sctp_outq *outq;
- int i;
asoc = container_of(stream, struct sctp_association, stream);
outq = &asoc->outqueue;
@@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
sctp_chunk_free(ch);
}
+}
+
+/* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+ */
+static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+ struct sctp_stream *new, __u16 outcnt)
+{
+ int i;
+
+ if (stream->outcnt > outcnt)
+ sctp_stream_shrink_out(stream, outcnt);
if (new) {
/* Here we actually move the old ext stuff into the new
@@ -1037,11 +1044,13 @@ struct sctp_chunk *sctp_process_strreset_resp(
nums = ntohs(addstrm->number_of_streams);
number = stream->outcnt - nums;
- if (result == SCTP_STRRESET_PERFORMED)
+ if (result == SCTP_STRRESET_PERFORMED) {
for (i = number; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
- else
+ } else {
+ sctp_stream_shrink_out(stream, number);
stream->outcnt = number;
+ }
*evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
0, nums, GFP_ATOMIC);
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index f54a70b8da82..1ab3c5a2c5ad 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -2,7 +2,7 @@
config SMC
tristate "SMC socket protocol family"
depends on INET && INFINIBAND
- ---help---
+ help
SMC-R provides a "sockets over RDMA" solution making use of
RDMA over Converged Ethernet (RoCE) technology to upgrade
AF_INET TCP connections transparently.
@@ -14,7 +14,7 @@ config SMC
config SMC_DIAG
tristate "SMC: socket monitoring interface"
depends on SMC
- ---help---
+ help
Support for SMC socket monitoring interface used by tools such as
smcss.
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 903321543838..1163d51196da 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -126,8 +126,10 @@ EXPORT_SYMBOL_GPL(smc_proto6);
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
- smc->clcsock->file->private_data = smc->sk.sk_socket;
- smc->clcsock->file = NULL;
+ if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
+ smc->clcsock->file->private_data = smc->sk.sk_socket;
+ smc->clcsock->file = NULL;
+ }
}
static int __smc_release(struct smc_sock *smc)
@@ -352,7 +354,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ if (!smc_link_active(&lgr->lnk[i]))
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
if (rc)
@@ -632,7 +634,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *l = &smc->conn.lgr->lnk[i];
- if (l->peer_qpn == ntoh24(aclc->qpn)) {
+ if (l->peer_qpn == ntoh24(aclc->qpn) &&
+ !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) &&
+ !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) {
link = l;
break;
}
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a47e8855e045..ce468ff62a19 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -66,9 +66,13 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
- if (conn->killed)
+ if (conn->killed) {
/* abnormal termination */
+ if (!rc)
+ smc_wr_tx_put_slot(link,
+ (struct smc_wr_tx_pend_priv *)pend);
rc = -EPIPE;
+ }
return rc;
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index d5627df24215..779f4142a11d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -27,6 +27,7 @@
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
+#define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
@@ -36,7 +37,7 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
-static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_msg_accept_confirm *clc;
@@ -49,12 +50,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return false;
switch (clcm->type) {
case SMC_CLC_PROPOSAL:
- if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
- clcm->path != SMC_TYPE_B)
- return false;
pclc = (struct smc_clc_msg_proposal *)clcm;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
- if (ntohs(pclc->hdr.length) !=
+ if (ntohs(pclc->hdr.length) <
sizeof(*pclc) + ntohs(pclc->iparea_offset) +
sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
@@ -86,7 +84,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
default:
return false;
}
- if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
+ if (check_trl &&
+ memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
return false;
return true;
@@ -276,7 +275,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
struct msghdr msg = {NULL, 0};
int reason_code = 0;
struct kvec vec = {buf, buflen};
- int len, datlen;
+ int len, datlen, recvlen;
+ bool check_trl = true;
int krflags;
/* peek the first few bytes to determine length of data to receive
@@ -320,10 +320,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- (datlen > buflen) ||
- (clcm->version != SMC_CLC_V1) ||
- (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
- clcm->path != SMC_TYPE_B) ||
+ (clcm->version < SMC_CLC_V1) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
@@ -331,16 +328,38 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
goto out;
}
+ if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N)
+ reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */
+
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
+ if (datlen > buflen) {
+ check_trl = false;
+ recvlen = buflen;
+ } else {
+ recvlen = datlen;
+ }
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
krflags = MSG_WAITALL;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
- if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
+ if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
}
+ datlen -= len;
+ while (datlen) {
+ u8 tmp[SMC_CLC_RECV_BUF_LEN];
+
+ vec.iov_base = &tmp;
+ vec.iov_len = SMC_CLC_RECV_BUF_LEN;
+ /* receive remaining proposal message */
+ recvlen = datlen > SMC_CLC_RECV_BUF_LEN ?
+ SMC_CLC_RECV_BUF_LEN : datlen;
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
+ len = sock_recvmsg(smc->clcsock, &msg, krflags);
+ datlen -= len;
+ }
if (clcm->type == SMC_CLC_DECLINE) {
struct smc_clc_msg_decline *dclc;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 465876701b75..76c2b150d040 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -25,6 +25,7 @@
#define SMC_CLC_V1 0x1 /* SMC version */
#define SMC_TYPE_R 0 /* SMC-R only */
#define SMC_TYPE_D 1 /* SMC-D only */
+#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */
#define SMC_TYPE_B 3 /* SMC-R and SMC-D */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */
@@ -46,6 +47,7 @@
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
+#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 7964a21e5e6f..f82a2e599917 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -15,6 +15,7 @@
#include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/reboot.h>
+#include <linux/mutex.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
@@ -44,18 +45,10 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
-struct smc_ib_up_work {
- struct work_struct work;
- struct smc_link_group *lgr;
- struct smc_ib_device *smcibdev;
- u8 ibport;
-};
-
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
-static void smc_link_up_work(struct work_struct *work);
static void smc_link_down_work(struct work_struct *work);
/* return head of link group list and its lock for a given link group */
@@ -247,7 +240,8 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
if (smc_link_usable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
- wake_up_interruptible_all(&lgr->llc_waiter);
+ wake_up_all(&lgr->llc_msg_waiter);
+ wake_up_all(&lgr->llc_flow_waiter);
}
static void smc_lgr_free(struct smc_link_group *lgr);
@@ -324,7 +318,6 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
get_device(&ini->ib_dev->ibdev->dev);
atomic_inc(&ini->ib_dev->lnk_cnt);
- lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
lnk->link_idx = link_idx;
@@ -360,6 +353,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
+ lnk->state = SMC_LNK_ACTIVATING;
return 0;
destroy_qp:
@@ -450,7 +444,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
}
smc->conn.lgr = lgr;
spin_lock_bh(lgr_lock);
- list_add(&lgr->list, lgr_list);
+ list_add_tail(&lgr->list, lgr_list);
spin_unlock_bh(lgr_lock);
return 0;
@@ -548,8 +542,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
smc_wr_wakeup_tx_wait(from_lnk);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
- i == from_lnk->link_idx)
+ if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
continue;
if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
from_lnk->ibport == lgr->lnk[i].ibport) {
@@ -1104,66 +1097,23 @@ static void smc_conn_abort_work(struct work_struct *work)
sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}
-/* link is up - establish alternate link if applicable */
-static void smcr_link_up(struct smc_link_group *lgr,
- struct smc_ib_device *smcibdev, u8 ibport)
-{
- struct smc_link *link = NULL;
-
- if (list_empty(&lgr->list) ||
- lgr->type == SMC_LGR_SYMMETRIC ||
- lgr->type == SMC_LGR_ASYMMETRIC_PEER)
- return;
-
- if (lgr->role == SMC_SERV) {
- /* trigger local add link processing */
- link = smc_llc_usable_link(lgr);
- if (!link)
- return;
- smc_llc_srv_add_link_local(link);
- } else {
- /* invite server to start add link processing */
- u8 gid[SMC_GID_SIZE];
-
- if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
- NULL))
- return;
- if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
- /* some other llc task is ongoing */
- wait_event_interruptible_timeout(lgr->llc_waiter,
- (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
- SMC_LLC_WAIT_TIME);
- }
- if (list_empty(&lgr->list) ||
- !smc_ib_port_active(smcibdev, ibport))
- return; /* lgr or device no longer active */
- link = smc_llc_usable_link(lgr);
- if (!link)
- return;
- smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
- NULL, SMC_LLC_REQ);
- }
-}
-
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
- struct smc_ib_up_work *ib_work;
struct smc_link_group *lgr, *n;
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ struct smc_link *link;
+
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN) ||
lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
continue;
- ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
- if (!ib_work)
- continue;
- INIT_WORK(&ib_work->work, smc_link_up_work);
- ib_work->lgr = lgr;
- ib_work->smcibdev = smcibdev;
- ib_work->ibport = ibport;
- schedule_work(&ib_work->work);
+
+ /* trigger local add link processing */
+ link = smc_llc_usable_link(lgr);
+ if (link)
+ smc_llc_add_link_local(link);
}
}
@@ -1195,13 +1145,19 @@ static void smcr_link_down(struct smc_link *lnk)
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
mutex_unlock(&lgr->llc_conf_mutex);
- wait_event_interruptible_timeout(lgr->llc_waiter,
- (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ wait_event_timeout(lgr->llc_flow_waiter,
+ (list_empty(&lgr->list) ||
+ lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
mutex_lock(&lgr->llc_conf_mutex);
}
- smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
- SMC_LLC_DEL_LOST_PATH);
+ if (!list_empty(&lgr->list)) {
+ smc_llc_send_delete_link(to_lnk, del_link_id,
+ SMC_LLC_REQ, true,
+ SMC_LLC_DEL_LOST_PATH);
+ smcr_link_clear(lnk, true);
+ }
+ wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
}
}
@@ -1240,20 +1196,6 @@ void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
}
}
-static void smc_link_up_work(struct work_struct *work)
-{
- struct smc_ib_up_work *ib_work = container_of(work,
- struct smc_ib_up_work,
- work);
- struct smc_link_group *lgr = ib_work->lgr;
-
- if (list_empty(&lgr->list))
- goto out;
- smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
-out:
- kfree(ib_work);
-}
-
static void smc_link_down_work(struct work_struct *work)
{
struct smc_link *link = container_of(work, struct smc_link,
@@ -1262,7 +1204,7 @@ static void smc_link_down_work(struct work_struct *work)
if (list_empty(&lgr->list))
return;
- wake_up_interruptible_all(&lgr->llc_waiter);
+ wake_up_all(&lgr->llc_msg_waiter);
mutex_lock(&lgr->llc_conf_mutex);
smcr_link_down(link);
mutex_unlock(&lgr->llc_conf_mutex);
@@ -1326,7 +1268,7 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
return false;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ if (!smc_link_active(&lgr->lnk[i]))
continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
!memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
@@ -1369,7 +1311,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&
- (role == SMC_CLNT ||
+ (role == SMC_CLNT || ini->is_smcd ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
@@ -1774,14 +1716,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
@@ -1793,7 +1735,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
if (!conn->lgr || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_usable(&conn->lgr->lnk[i]))
+ if (!smc_link_active(&conn->lgr->lnk[i]))
continue;
smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
DMA_FROM_DEVICE);
@@ -1807,7 +1749,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
if (!conn->lgr || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_usable(&conn->lgr->lnk[i]))
+ if (!smc_link_active(&conn->lgr->lnk[i]))
continue;
smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
DMA_FROM_DEVICE);
@@ -1830,8 +1772,12 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
- if (rc)
+ if (rc) {
+ mutex_lock(&smc->conn.lgr->sndbufs_lock);
+ list_del(&smc->conn.sndbuf_desc->list);
+ mutex_unlock(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
+ }
return rc;
}
@@ -1955,20 +1901,20 @@ static void smc_core_going_away(void)
struct smc_ib_device *smcibdev;
struct smcd_dev *smcd;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
int i;
for (i = 0; i < SMC_MAX_PORTS; i++)
set_bit(i, smcibdev->ports_going_away);
}
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd, &smcd_dev_list.list, list) {
smcd->going_away = 1;
}
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
}
/* Clean up all SMC link groups */
@@ -1980,10 +1926,10 @@ static void smc_lgrs_shutdown(void)
smc_smcr_terminate_all(NULL);
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd, &smcd_dev_list.list, list)
smc_smcd_terminate_all(smcd);
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
}
static int smc_core_reboot_event(struct notifier_block *this,
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 86d160f0d187..1c4d5439d0ff 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -262,8 +262,10 @@ struct smc_link_group {
struct work_struct llc_del_link_work;
struct work_struct llc_event_work;
/* llc event worker */
- wait_queue_head_t llc_waiter;
+ wait_queue_head_t llc_flow_waiter;
/* w4 next llc event */
+ wait_queue_head_t llc_msg_waiter;
+ /* w4 next llc msg */
struct smc_llc_flow llc_flow_lcl;
/* llc local control field */
struct smc_llc_flow llc_flow_rmt;
@@ -347,6 +349,11 @@ static inline bool smc_link_usable(struct smc_link *lnk)
return true;
}
+static inline bool smc_link_active(struct smc_link *lnk)
+{
+ return lnk->state == SMC_LNK_ACTIVE;
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 562a52d01ad1..1c314dbdc7fa 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -16,6 +16,7 @@
#include <linux/workqueue.h>
#include <linux/scatterlist.h>
#include <linux/wait.h>
+#include <linux/mutex.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -33,7 +34,7 @@
#define SMC_QP_RNR_RETRY 7 /* 7: infinite */
struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */
- .lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
+ .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex),
.list = LIST_HEAD_INIT(smc_ib_devices.list),
};
@@ -505,6 +506,10 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
int cqe_size_order, smc_order;
long rc;
+ mutex_lock(&smcibdev->mutex);
+ rc = 0;
+ if (smcibdev->initialized)
+ goto out;
/* the calculated number of cq entries fits to mlx5 cq allocation */
cqe_size_order = cache_line_size() == 128 ? 7 : 6;
smc_order = MAX_ORDER - cqe_size_order - 1;
@@ -516,7 +521,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send);
if (IS_ERR(smcibdev->roce_cq_send)) {
smcibdev->roce_cq_send = NULL;
- return rc;
+ goto out;
}
smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev,
smc_wr_rx_cq_handler, NULL,
@@ -528,21 +533,26 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
}
smc_wr_add_dev(smcibdev);
smcibdev->initialized = 1;
- return rc;
+ goto out;
err:
ib_destroy_cq(smcibdev->roce_cq_send);
+out:
+ mutex_unlock(&smcibdev->mutex);
return rc;
}
static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
+ mutex_lock(&smcibdev->mutex);
if (!smcibdev->initialized)
- return;
+ goto out;
smcibdev->initialized = 0;
ib_destroy_cq(smcibdev->roce_cq_recv);
ib_destroy_cq(smcibdev->roce_cq_send);
smc_wr_remove_dev(smcibdev);
+out:
+ mutex_unlock(&smcibdev->mutex);
}
static struct ib_client smc_ib_client;
@@ -565,9 +575,10 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
atomic_set(&smcibdev->lnk_cnt, 0);
init_waitqueue_head(&smcibdev->lnks_deleted);
- spin_lock(&smc_ib_devices.lock);
+ mutex_init(&smcibdev->mutex);
+ mutex_lock(&smc_ib_devices.mutex);
list_add_tail(&smcibdev->list, &smc_ib_devices.list);
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
smc_ib_global_event_handler);
@@ -602,9 +613,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
{
struct smc_ib_device *smcibdev = client_data;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
pr_warn_ratelimited("smc: removing ib device %s\n",
smcibdev->ibdev->name);
smc_smcr_terminate_all(smcibdev);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index e6a696ae15f3..2ce481187dd0 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/if_ether.h>
+#include <linux/mutex.h>
#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <net/smc.h>
@@ -25,7 +26,7 @@
struct smc_ib_devices { /* list of smc ib devices definition */
struct list_head list;
- spinlock_t lock; /* protects list of smc ib devices */
+ struct mutex mutex; /* protects list of smc ib devices */
};
extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */
@@ -51,6 +52,7 @@ struct smc_ib_device { /* ib-device infos for smc */
DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
atomic_t lnk_cnt; /* number of links on ibdev */
wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/
+ struct mutex mutex; /* protect dev setup+cleanup */
};
struct smc_buf_desc;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 91f85fc09fb8..998c525de785 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -7,6 +7,7 @@
*/
#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/slab.h>
#include <asm/page.h>
@@ -17,7 +18,7 @@
struct smcd_dev_list smcd_dev_list = {
.list = LIST_HEAD_INIT(smcd_dev_list.list),
- .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
+ .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex)
};
/* Test if an ISM communication is possible. */
@@ -317,9 +318,9 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev);
int smcd_register_dev(struct smcd_dev *smcd)
{
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_add_tail(&smcd->list, &smcd_dev_list.list);
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
dev_name(&smcd->dev), smcd->pnetid,
@@ -333,9 +334,9 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
{
pr_warn_ratelimited("smc: removing smcd device %s\n",
dev_name(&smcd->dev));
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_del_init(&smcd->list);
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
smcd->going_away = 1;
smc_smcd_terminate_all(smcd);
flush_workqueue(smcd->event_wq);
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 4da946cbfa29..81cc4537efd3 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -10,12 +10,13 @@
#define SMCD_ISM_H
#include <linux/uio.h>
+#include <linux/mutex.h>
#include "smc.h"
struct smcd_dev_list { /* List of SMCD devices */
struct list_head list;
- spinlock_t lock; /* Protects list of devices */
+ struct mutex mutex; /* Protects list of devices */
};
extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 391237b601fe..df5b0a6ea848 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -186,6 +186,26 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
flow->qentry = qentry;
}
+static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
+ struct smc_llc_qentry *qentry)
+{
+ u8 msg_type = qentry->msg.raw.hdr.common.type;
+
+ if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
+ flow_type != msg_type && !lgr->delayed_event) {
+ lgr->delayed_event = qentry;
+ return;
+ }
+ /* drop parallel or already-in-progress llc requests */
+ if (flow_type != msg_type)
+ pr_warn_once("smc: SMC-R lg %*phN dropped parallel "
+ "LLC msg: msg %d flow %d role %d\n",
+ SMC_LGR_ID_SIZE, &lgr->id,
+ qentry->msg.raw.hdr.common.type,
+ flow_type, lgr->role);
+ kfree(qentry);
+}
+
/* try to start a new llc flow, initiated by an incoming llc msg */
static bool smc_llc_flow_start(struct smc_llc_flow *flow,
struct smc_llc_qentry *qentry)
@@ -195,14 +215,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
spin_lock_bh(&lgr->llc_flow_lock);
if (flow->type) {
/* a flow is already active */
- if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
- qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
- !lgr->delayed_event) {
- lgr->delayed_event = qentry;
- } else {
- /* forget this llc request */
- kfree(qentry);
- }
+ smc_llc_flow_parallel(lgr, flow->type, qentry);
spin_unlock_bh(&lgr->llc_flow_lock);
return false;
}
@@ -222,8 +235,8 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
}
if (qentry == lgr->delayed_event)
lgr->delayed_event = NULL;
- spin_unlock_bh(&lgr->llc_flow_lock);
smc_llc_flow_qentry_set(flow, qentry);
+ spin_unlock_bh(&lgr->llc_flow_lock);
return true;
}
@@ -251,11 +264,11 @@ again:
return 0;
}
spin_unlock_bh(&lgr->llc_flow_lock);
- rc = wait_event_interruptible_timeout(lgr->llc_waiter,
- (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
- (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
- lgr->llc_flow_rmt.type == allowed_remote)),
- SMC_LLC_WAIT_TIME);
+ rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) ||
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+ (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+ lgr->llc_flow_rmt.type == allowed_remote))),
+ SMC_LLC_WAIT_TIME * 10);
if (!rc)
return -ETIMEDOUT;
goto again;
@@ -272,7 +285,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
flow == &lgr->llc_flow_lcl)
schedule_work(&lgr->llc_event_work);
else
- wake_up_interruptible(&lgr->llc_waiter);
+ wake_up(&lgr->llc_flow_waiter);
}
/* lnk is optional and used for early wakeup when link goes down, useful in
@@ -283,26 +296,32 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
int time_out, u8 exp_msg)
{
struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
+ u8 rcv_msg;
- wait_event_interruptible_timeout(lgr->llc_waiter,
- (flow->qentry ||
- (lnk && !smc_link_usable(lnk)) ||
- list_empty(&lgr->list)),
- time_out);
+ wait_event_timeout(lgr->llc_msg_waiter,
+ (flow->qentry ||
+ (lnk && !smc_link_usable(lnk)) ||
+ list_empty(&lgr->list)),
+ time_out);
if (!flow->qentry ||
(lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
smc_llc_flow_qentry_del(flow);
goto out;
}
- if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
+ rcv_msg = flow->qentry->msg.raw.hdr.common.type;
+ if (exp_msg && rcv_msg != exp_msg) {
if (exp_msg == SMC_LLC_ADD_LINK &&
- flow->qentry->msg.raw.hdr.common.type ==
- SMC_LLC_DELETE_LINK) {
+ rcv_msg == SMC_LLC_DELETE_LINK) {
/* flow_start will delay the unexpected msg */
smc_llc_flow_start(&lgr->llc_flow_lcl,
smc_llc_flow_qentry_clr(flow));
return NULL;
}
+ pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: "
+ "msg %d exp %d flow %d role %d flags %x\n",
+ SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg,
+ flow->type, lgr->role,
+ flow->qentry->msg.raw.hdr.flags);
smc_llc_flow_qentry_del(flow);
}
out:
@@ -409,7 +428,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
rtok_ix = 1;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
link = &send_link->lgr->lnk[i];
- if (link->state == SMC_LNK_ACTIVE && link != send_link) {
+ if (smc_link_active(link) && link != send_link) {
rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
rkeyllc->rtoken[rtok_ix].rmb_key =
htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
@@ -876,6 +895,36 @@ out:
return rc;
}
+/* as an SMC client, invite server to start the add_link processing */
+static void smc_llc_cli_add_link_invite(struct smc_link *link,
+ struct smc_llc_qentry *qentry)
+{
+ struct smc_link_group *lgr = smc_get_lgr(link);
+ struct smc_init_info ini;
+
+ if (lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ goto out;
+
+ ini.vlan_id = lgr->vlan_id;
+ smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+ if (!ini.ib_dev)
+ goto out;
+
+ smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1],
+ ini.ib_gid, NULL, SMC_LLC_REQ);
+out:
+ kfree(qentry);
+}
+
+static bool smc_llc_is_local_add_link(union smc_llc_msg *llc)
+{
+ if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK &&
+ !llc->add_link.qp_mtu && !llc->add_link.link_num)
+ return true;
+ return false;
+}
+
static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
{
struct smc_llc_qentry *qentry;
@@ -883,7 +932,10 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
mutex_lock(&lgr->llc_conf_mutex);
- smc_llc_cli_add_link(qentry->link, qentry);
+ if (smc_llc_is_local_add_link(&qentry->msg))
+ smc_llc_cli_add_link_invite(qentry->link, qentry);
+ else
+ smc_llc_cli_add_link(qentry->link, qentry);
mutex_unlock(&lgr->llc_conf_mutex);
}
@@ -892,7 +944,7 @@ static int smc_llc_active_link_count(struct smc_link_group *lgr)
int i, link_count = 0;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_usable(&lgr->lnk[i]))
+ if (!smc_link_active(&lgr->lnk[i]))
continue;
link_count++;
}
@@ -1032,12 +1084,14 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
if (rc)
return -ENOLINK;
/* receive CONFIRM LINK response over the RoCE fabric */
- qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME,
- SMC_LLC_CONFIRM_LINK);
- if (!qentry) {
+ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0);
+ if (!qentry ||
+ qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
/* send DELETE LINK */
smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
false, SMC_LLC_DEL_LOST_PATH);
+ if (qentry)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
return -ENOLINK;
}
smc_llc_save_peer_uid(qentry);
@@ -1139,14 +1193,14 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
mutex_unlock(&lgr->llc_conf_mutex);
}
-/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */
-void smc_llc_srv_add_link_local(struct smc_link *link)
+/* enqueue a local add_link req to trigger a new add_link flow */
+void smc_llc_add_link_local(struct smc_link *link)
{
struct smc_llc_msg_add_link add_llc = {0};
add_llc.hd.length = sizeof(add_llc);
add_llc.hd.common.type = SMC_LLC_ADD_LINK;
- /* no dev and port needed, we as server ignore client data anyway */
+ /* no dev and port needed */
smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
}
@@ -1222,8 +1276,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_llc_send_message(lnk, &qentry->msg); /* response */
if (smc_link_downing(&lnk_del->state)) {
- smc_switch_conns(lgr, lnk_del, false);
- smc_wr_tx_wait_no_pending_sends(lnk_del);
+ if (smc_switch_conns(lgr, lnk_del, false))
+ smc_wr_tx_wait_no_pending_sends(lnk_del);
}
smcr_link_clear(lnk_del, true);
@@ -1297,8 +1351,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
goto out; /* asymmetric link already deleted */
if (smc_link_downing(&lnk_del->state)) {
- smc_switch_conns(lgr, lnk_del, false);
- smc_wr_tx_wait_no_pending_sends(lnk_del);
+ if (smc_switch_conns(lgr, lnk_del, false))
+ smc_wr_tx_wait_no_pending_sends(lnk_del);
}
if (!list_empty(&lgr->list)) {
/* qentry is either a request from peer (send it back to
@@ -1326,7 +1380,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
/* trigger setup of asymm alt link */
- smc_llc_srv_add_link_local(lnk);
+ smc_llc_add_link_local(lnk);
}
out:
mutex_unlock(&lgr->llc_conf_mutex);
@@ -1455,11 +1509,22 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
if (list_empty(&lgr->list))
goto out; /* lgr is terminating */
if (lgr->role == SMC_CLNT) {
- if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
+ if (smc_llc_is_local_add_link(llc)) {
+ if (lgr->llc_flow_lcl.type ==
+ SMC_LLC_FLOW_ADD_LINK)
+ break; /* add_link in progress */
+ if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+ qentry)) {
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ return;
+ }
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
+ !lgr->llc_flow_lcl.qentry) {
/* a flow is waiting for this message */
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
qentry);
- wake_up_interruptible(&lgr->llc_waiter);
+ wake_up(&lgr->llc_msg_waiter);
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
qentry)) {
schedule_work(&lgr->llc_add_link_work);
@@ -1474,33 +1539,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* a flow is waiting for this message */
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
- wake_up_interruptible(&lgr->llc_waiter);
+ wake_up(&lgr->llc_msg_waiter);
return;
}
break;
case SMC_LLC_DELETE_LINK:
- if (lgr->role == SMC_CLNT) {
- /* server requests to delete this link, send response */
- if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
- /* DEL LINK REQ during ADD LINK SEQ */
- smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
- qentry);
- wake_up_interruptible(&lgr->llc_waiter);
- } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
- qentry)) {
- schedule_work(&lgr->llc_del_link_work);
- }
- } else {
- if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
- !lgr->llc_flow_lcl.qentry) {
- /* DEL LINK REQ during ADD LINK SEQ */
- smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
- qentry);
- wake_up_interruptible(&lgr->llc_waiter);
- } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
- qentry)) {
- schedule_work(&lgr->llc_del_link_work);
- }
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
+ !lgr->llc_flow_lcl.qentry) {
+ /* DEL LINK REQ during ADD LINK SEQ */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
+ wake_up(&lgr->llc_msg_waiter);
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+ schedule_work(&lgr->llc_del_link_work);
}
return;
case SMC_LLC_CONFIRM_RKEY:
@@ -1566,23 +1616,30 @@ again:
static void smc_llc_rx_response(struct smc_link *link,
struct smc_llc_qentry *qentry)
{
+ enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type;
+ struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl;
u8 llc_type = qentry->msg.raw.hdr.common.type;
switch (llc_type) {
case SMC_LLC_TEST_LINK:
- if (link->state == SMC_LNK_ACTIVE)
+ if (smc_link_active(link))
complete(&link->llc_testlink_resp);
break;
case SMC_LLC_ADD_LINK:
- case SMC_LLC_DELETE_LINK:
- case SMC_LLC_CONFIRM_LINK:
case SMC_LLC_ADD_LINK_CONT:
+ case SMC_LLC_CONFIRM_LINK:
+ if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry)
+ break; /* drop out-of-flow response */
+ goto assign;
+ case SMC_LLC_DELETE_LINK:
+ if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry)
+ break; /* drop out-of-flow response */
+ goto assign;
case SMC_LLC_CONFIRM_RKEY:
case SMC_LLC_DELETE_RKEY:
- /* assign responses to the local flow, we requested them */
- smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
- wake_up_interruptible(&link->lgr->llc_waiter);
- return;
+ if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry)
+ break; /* drop out-of-flow response */
+ goto assign;
case SMC_LLC_CONFIRM_RKEY_CONT:
/* not used because max links is 3 */
break;
@@ -1591,6 +1648,11 @@ static void smc_llc_rx_response(struct smc_link *link,
break;
}
kfree(qentry);
+ return;
+assign:
+ /* assign responses to the local flow, we requested them */
+ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
+ wake_up(&link->lgr->llc_msg_waiter);
}
static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
@@ -1616,7 +1678,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
list_add_tail(&qentry->list, &lgr->llc_event_q);
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
- schedule_work(&link->lgr->llc_event_work);
+ schedule_work(&lgr->llc_event_work);
}
/* copy received msg and add it to the event queue */
@@ -1644,7 +1706,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
u8 user_data[16] = { 0 };
int rc;
- if (link->state != SMC_LNK_ACTIVE)
+ if (!smc_link_active(link))
return; /* don't reschedule worker */
expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
if (time_is_after_jiffies(expire_time)) {
@@ -1656,7 +1718,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
/* receive TEST LINK response over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
- if (link->state != SMC_LNK_ACTIVE)
+ if (!smc_link_active(link))
return; /* link state changed */
if (rc <= 0) {
smcr_link_down_cond_sched(link);
@@ -1677,7 +1739,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
INIT_LIST_HEAD(&lgr->llc_event_q);
spin_lock_init(&lgr->llc_event_q_lock);
spin_lock_init(&lgr->llc_flow_lock);
- init_waitqueue_head(&lgr->llc_waiter);
+ init_waitqueue_head(&lgr->llc_flow_waiter);
+ init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
@@ -1686,7 +1749,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
smc_llc_event_flush(lgr);
- wake_up_interruptible_all(&lgr->llc_waiter);
+ wake_up_all(&lgr->llc_flow_waiter);
+ wake_up_all(&lgr->llc_msg_waiter);
cancel_work_sync(&lgr->llc_event_work);
cancel_work_sync(&lgr->llc_add_link_work);
cancel_work_sync(&lgr->llc_del_link_work);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index a5d2fe3eea61..cc00a2ec4e92 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -103,7 +103,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
int smc_llc_srv_add_link(struct smc_link *link);
-void smc_llc_srv_add_link_local(struct smc_link *link);
+void smc_llc_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 014d91b9778e..30e5fac7034e 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/list.h>
#include <linux/ctype.h>
+#include <linux/mutex.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -129,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
return rc;
/* remove ib devices */
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
if (ibdev->pnetid_by_user[ibport] &&
@@ -149,9 +150,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
}
}
}
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
/* remove smcd devices */
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
if (smcd_dev->pnetid_by_user &&
(!pnet_name ||
@@ -165,7 +166,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
rc = 0;
}
}
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
return rc;
}
@@ -240,14 +241,14 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
SMC_MAX_PNETID_LEN);
ib_dev->pnetid_by_user[ib_port - 1] = true;
applied = true;
}
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
return applied;
}
@@ -258,13 +259,13 @@ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = true;
applied = true;
}
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
return applied;
}
@@ -300,7 +301,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
{
struct smc_ib_device *ibdev;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (!strncmp(ibdev->ibdev->name, ib_name,
sizeof(ibdev->ibdev->name)) ||
@@ -311,7 +312,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
}
ibdev = NULL;
out:
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
return ibdev;
}
@@ -320,7 +321,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
{
struct smcd_dev *smcd_dev;
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
IB_DEVICE_NAME_MAX - 1))
@@ -328,7 +329,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
}
smcd_dev = NULL;
out:
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
return smcd_dev;
}
@@ -825,7 +826,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
int i;
ini->ib_dev = NULL;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev)
continue;
@@ -844,7 +845,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
}
}
out:
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
}
/* find alternate roce device with same pnet_id and vlan_id */
@@ -863,7 +864,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
{
struct smc_ib_device *ibdev;
- spin_lock(&smc_ib_devices.lock);
+ mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
struct net_device *ndev;
int i;
@@ -888,7 +889,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
}
}
}
- spin_unlock(&smc_ib_devices.lock);
+ mutex_unlock(&smc_ib_devices.mutex);
}
/* Determine the corresponding IB device port based on the hardware PNETID.
@@ -924,7 +925,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
return; /* pnetid could not be determined */
- spin_lock(&smcd_dev_list.lock);
+ mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away) {
@@ -932,7 +933,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
break;
}
}
- spin_unlock(&smcd_dev_list.lock);
+ mutex_unlock(&smcd_dev_list.mutex);
}
/* PNET table analysis for a given sock:
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 7239ba9b99dc..1e23cdd41eb1 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -169,6 +169,8 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
{
*idx = link->wr_tx_cnt;
+ if (!smc_link_usable(link))
+ return -ENOLINK;
for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
if (!test_and_set_bit(*idx, link->wr_tx_mask))
return 0;
@@ -560,15 +562,15 @@ void smc_wr_free_link(struct smc_link *lnk)
{
struct ib_device *ibdev;
+ if (!lnk->smcibdev)
+ return;
+ ibdev = lnk->smcibdev->ibdev;
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
- if (!lnk->smcibdev)
- return;
- ibdev = lnk->smcibdev->ibdev;
-
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 39e14d5edaf1..e9d0953522f0 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
q.len = strlen(gssd_dummy_clnt_dir[0].name);
clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
if (!clnt_dentry) {
+ __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
goto out;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5c4ec9386f81..d5805fa1d066 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -44,6 +44,7 @@
#include <net/tcp.h>
#include <net/tcp_states.h>
#include <linux/uaccess.h>
+#include <linux/highmem.h>
#include <asm/ioctls.h>
#include <linux/sunrpc/types.h>
@@ -563,7 +564,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
};
- unsigned int uninitialized_var(sent);
+ unsigned int sent;
int err;
svc_udp_release_rqst(rqstp);
@@ -1080,7 +1081,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct msghdr msg = {
.msg_flags = 0,
};
- unsigned int uninitialized_var(sent);
+ unsigned int sent;
int err;
svc_tcp_release_rqst(rqstp);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6f7d82fb1eb0..be11d672b5b9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->head[0].iov_len;
+ subbuf->head[0].iov_base = buf->head[0].iov_base;
subbuf->head[0].iov_len = 0;
}
@@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->page_len;
+ subbuf->pages = buf->pages;
+ subbuf->page_base = 0;
subbuf->page_len = 0;
}
@@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->tail[0].iov_len;
+ subbuf->tail[0].iov_base = buf->tail[0].iov_base;
subbuf->tail[0].iov_len = 0;
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ef997880e17a..b647562a26dd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_fastreg(wc, frwr);
/* The MR will get recycled when the associated req is retransmitted */
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_li(wc, frwr);
__frwr_release_mr(wc, mr);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
__frwr_release_mr(wc, mr);
complete(&frwr->fr_linv_done);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
smp_rmb();
rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2081c8fbfa48..453bacc99907 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -71,7 +71,7 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Read list size */
- size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
+ size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */
@@ -94,7 +94,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Write list size */
- size = sizeof(__be32); /* segment count */
+ size += sizeof(__be32); /* segment count */
size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
@@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
}
- r_xprt->rx_stats.bad_reply_count++;
- return -EREMOTEIO;
+ return -EIO;
}
/* Perform XID lookup, reconstruction of the RPC reply, and
@@ -1387,13 +1386,11 @@ out:
spin_unlock(&xprt->queue_lock);
return;
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
out_badheader:
trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
+ rqst->rq_task->tk_status = status;
+ status = 0;
goto out;
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0c4af7f5e241..053c8ab1265a 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -242,13 +242,18 @@ xprt_rdma_connect_worker(struct work_struct *work)
rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
- if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) {
+ if (!rc) {
xprt->connect_cookie++;
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
rc = -EAGAIN;
+ } else {
+ /* Force a call to xprt_rdma_close to clean up */
+ spin_lock(&xprt->transport_lock);
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ spin_unlock(&xprt->transport_lock);
}
xprt_wake_pending_tasks(xprt, rc);
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2ae348377806..75c646743df3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
-static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep);
+static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
+static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags);
@@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
- struct rdma_cm_id *id = r_xprt->rx_ep->re_id;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct rdma_cm_id *id = ep->re_id;
/* Flush Receives, then wait for deferred Reply work
* to complete.
@@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
* local invalidations.
*/
ib_drain_sq(id->qp);
+
+ rpcrdma_ep_put(ep);
}
/**
@@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
trace_xprtrdma_qp_event(ep, event);
}
+/* Ensure xprt_force_disconnect() is invoked exactly once when a
+ * connection is closed or lost. (The important thing is it needs
+ * to be invoked "at least" once).
+ */
+static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
+{
+ if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
+ xprt_force_disconnect(ep->re_xprt);
+}
+
/**
* rpcrdma_flush_disconnect - Disconnect on flushed completion
- * @cq: completion queue
+ * @r_xprt: transport to disconnect
* @wc: work completion entry
*
* Must be called in process context.
*/
-void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc)
+void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc)
{
- struct rpcrdma_xprt *r_xprt = cq->cq_context;
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
- if (wc->status != IB_WC_SUCCESS &&
- r_xprt->rx_ep->re_connect_status == 1) {
- r_xprt->rx_ep->re_connect_status = -ECONNABORTED;
- xprt_force_disconnect(xprt);
- }
+ if (wc->status != IB_WC_SUCCESS)
+ rpcrdma_force_disconnect(r_xprt->rx_ep);
}
/**
@@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_sendctx *sc =
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+ struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
- rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_sendctx_put_locked(r_xprt, sc);
+ rpcrdma_flush_disconnect(r_xprt, wc);
}
/**
@@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(r_xprt, wc);
rpcrdma_rep_destroy(rep);
}
@@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
- struct rpc_xprt *xprt = ep->re_xprt;
might_sleep();
@@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
/* fall through */
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
- xprt_force_disconnect(xprt);
goto disconnected;
case RDMA_CM_EVENT_ESTABLISHED:
- kref_get(&ep->re_kref);
+ rpcrdma_ep_get(ep);
ep->re_connect_status = 1;
rpcrdma_update_cm_private(ep, &event->param.conn);
trace_xprtrdma_inline_thresh(ep);
@@ -274,22 +281,24 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
ep->re_connect_status = -ENOTCONN;
- goto disconnected;
+ goto wake_connect_worker;
case RDMA_CM_EVENT_UNREACHABLE:
ep->re_connect_status = -ENETUNREACH;
- goto disconnected;
+ goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
- ep->re_connect_status = -EAGAIN;
- goto disconnected;
+ ep->re_connect_status = -ENOTCONN;
+wake_connect_worker:
+ wake_up_all(&ep->re_connect_wait);
+ return 0;
case RDMA_CM_EVENT_DISCONNECTED:
ep->re_connect_status = -ECONNABORTED;
disconnected:
- xprt_force_disconnect(xprt);
- return rpcrdma_ep_destroy(ep);
+ rpcrdma_force_disconnect(ep);
+ return rpcrdma_ep_put(ep);
default:
break;
}
@@ -345,7 +354,7 @@ out:
return ERR_PTR(rc);
}
-static void rpcrdma_ep_put(struct kref *kref)
+static void rpcrdma_ep_destroy(struct kref *kref)
{
struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref);
@@ -369,13 +378,18 @@ static void rpcrdma_ep_put(struct kref *kref)
module_put(THIS_MODULE);
}
+static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep)
+{
+ kref_get(&ep->re_kref);
+}
+
/* Returns:
* %0 if @ep still has a positive kref count, or
* %1 if @ep was destroyed successfully.
*/
-static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep)
+static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep)
{
- return kref_put(&ep->re_kref, rpcrdma_ep_put);
+ return kref_put(&ep->re_kref, rpcrdma_ep_destroy);
}
static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
@@ -388,14 +402,14 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep = kzalloc(sizeof(*ep), GFP_NOFS);
if (!ep)
- return -EAGAIN;
+ return -ENOTCONN;
ep->re_xprt = &r_xprt->rx_xprt;
kref_init(&ep->re_kref);
id = rpcrdma_create_id(r_xprt, ep);
if (IS_ERR(id)) {
- rc = PTR_ERR(id);
- goto out_free;
+ kfree(ep);
+ return PTR_ERR(id);
}
__module_get(THIS_MODULE);
device = id->device;
@@ -492,11 +506,8 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
return 0;
out_destroy:
- rpcrdma_ep_destroy(ep);
+ rpcrdma_ep_put(ep);
rdma_destroy_id(id);
-out_free:
- kfree(ep);
- r_xprt->rx_ep = NULL;
return rc;
}
@@ -512,22 +523,19 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep;
int rc;
-retry:
- rpcrdma_xprt_disconnect(r_xprt);
rc = rpcrdma_ep_create(r_xprt);
if (rc)
return rc;
ep = r_xprt->rx_ep;
- ep->re_connect_status = 0;
xprt_clear_connected(xprt);
-
rpcrdma_reset_cwnd(r_xprt);
- rpcrdma_post_recvs(r_xprt, true);
- rc = rpcrdma_sendctxs_create(r_xprt);
- if (rc)
- goto out;
+ /* Bump the ep's reference count while there are
+ * outstanding Receives.
+ */
+ rpcrdma_ep_get(ep);
+ rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -538,22 +546,24 @@ retry:
wait_event_interruptible(ep->re_connect_wait,
ep->re_connect_status != 0);
if (ep->re_connect_status <= 0) {
- if (ep->re_connect_status == -EAGAIN)
- goto retry;
rc = ep->re_connect_status;
goto out;
}
+ rc = rpcrdma_sendctxs_create(r_xprt);
+ if (rc) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+
rc = rpcrdma_reqs_setup(r_xprt);
if (rc) {
- rpcrdma_xprt_disconnect(r_xprt);
+ rc = -ENOTCONN;
goto out;
}
rpcrdma_mrs_create(r_xprt);
out:
- if (rc)
- ep->re_connect_status = rc;
trace_xprtrdma_connect(r_xprt, rc);
return rc;
}
@@ -587,7 +597,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt)
rpcrdma_mrs_destroy(r_xprt);
rpcrdma_sendctxs_destroy(r_xprt);
- if (rpcrdma_ep_destroy(ep))
+ if (rpcrdma_ep_put(ep))
rdma_destroy_id(id);
r_xprt->rx_ep = NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0a16fdb09b2c..43974ef39a50 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -82,6 +82,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
struct rpc_xprt *re_xprt;
@@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Endpoint calls - xprtrdma/verbs.c
*/
-void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc);
+void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 914508ea9b84..c57aef829403 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -496,8 +496,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
{
struct xdr_buf *buf = &req->rq_private_buf;
- size_t want, uninitialized_var(read);
- ssize_t uninitialized_var(ret);
+ size_t want, read;
+ ssize_t ret;
xs_read_header(transport, buf);
@@ -844,7 +844,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
- unsigned int uninitialized_var(sent);
+ unsigned int sent;
int status;
/* Close the stream if the previous transmission was incomplete */
@@ -915,7 +915,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
.msg_namelen = xprt->addrlen,
.msg_flags = XS_SENDMSG_FLAGS,
};
- unsigned int uninitialized_var(sent);
+ unsigned int sent;
int status;
xs_pktdump("packet data:",
@@ -999,7 +999,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
.msg_flags = XS_SENDMSG_FLAGS,
};
bool vm_wait = false;
- unsigned int uninitialized_var(sent);
+ unsigned int sent;
int status;
/* Close the stream if the previous transmission was incomplete */
diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
index 50f21a657007..18a2d980e11d 100644
--- a/net/switchdev/Kconfig
+++ b/net/switchdev/Kconfig
@@ -6,7 +6,7 @@
config NET_SWITCHDEV
bool "Switch (and switch-ish) device support"
depends on INET
- ---help---
+ help
This module provides glue between core networking code and device
drivers in order to support hardware switch chips in very generic
meaning of the word "switch". This include devices supporting L2/L3 but
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 716b61a701a8..9dd780215eef 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,7 +6,7 @@
menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
- ---help---
+ help
The Transparent Inter Process Communication (TIPC) protocol is
specially designed for intra cluster communication. This protocol
originates from Ericsson where it has been used in carrier grade
@@ -55,6 +55,6 @@ config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
depends on TIPC
default y
- ---help---
+ help
Support for TIPC socket monitoring interface used by ss tool.
If unsure, say Y.
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 34ca7b789eba..e366ec9a7e4d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -316,7 +316,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
- test_and_set_bit_lock(0, &b->up);
refcount_set(&b->refcnt, 1);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
@@ -326,6 +325,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
goto rejected;
}
+ test_and_set_bit_lock(0, &b->up);
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ee3b8d0576b8..d40f8e5b7683 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -827,11 +827,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
state |= l->bc_rcvlink->rcv_unacked;
state |= l->rcv_unacked;
state |= !skb_queue_empty(&l->transmq);
- state |= !skb_queue_empty(&l->deferdq);
probe = mstate->probing;
probe |= l->silent_intv_cnt;
if (probe || mstate->monitoring)
l->silent_intv_cnt++;
+ probe |= !skb_queue_empty(&l->deferdq);
if (l->snd_nxt == l->checkpoint) {
tipc_link_update_cwin(l, 0, 0);
probe = true;
@@ -921,6 +921,21 @@ static void link_prepare_wakeup(struct tipc_link *l)
}
+/**
+ * tipc_link_set_skb_retransmit_time - set the time at which retransmission of
+ * the given skb should be next attempted
+ * @skb: skb to set a future retransmission time for
+ * @l: link the skb will be transmitted on
+ */
+static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb,
+ struct tipc_link *l)
+{
+ if (link_is_bc_sndlink(l))
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ else
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+}
+
void tipc_link_reset(struct tipc_link *l)
{
struct sk_buff_head list;
@@ -1036,9 +1051,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
@@ -1139,9 +1152,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
if (unlikely(skb == l->backlog[imp].target_bskb))
l->backlog[imp].target_bskb = NULL;
__skb_queue_tail(&l->transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
@@ -1584,8 +1595,7 @@ release:
/* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
- TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ?
- TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
_skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
continue;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 046e4cb3acea..01b64869a173 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -238,14 +238,14 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
hdr = buf_msg(skb);
curr = msg_blocks(hdr);
mlen = msg_size(hdr);
- cpy = min_t(int, rem, mss - mlen);
+ cpy = min_t(size_t, rem, mss - mlen);
if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
return -EFAULT;
msg_set_size(hdr, mlen + cpy);
skb_put(skb, cpy);
rem -= cpy;
total += msg_blocks(hdr) - curr;
- } while (rem);
+ } while (rem > 0);
return total - accounted;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 26123f4177fd..a94f38333698 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1574,7 +1574,8 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
break;
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
blocks = tsk->snd_backlog;
- if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
+ if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
+ send <= maxnagle) {
rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
if (unlikely(rc < 0))
break;
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 61ec78521a60..fa0724fd84b4 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -11,7 +11,7 @@ config TLS
select STREAM_PARSER
select NET_SOCK_MSG
default n
- ---help---
+ help
Enable kernel support for TLS protocol. This allows symmetric
encryption handling of the TLS protocol to be done in-kernel.
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 24f64bc0de18..710bd44eaa49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -670,7 +670,7 @@ static int tls_push_record(struct sock *sk, int flags,
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
- u32 i, split_point, uninitialized_var(orig_end);
+ u32 i, split_point, orig_end;
struct sk_msg *msg_pl, *msg_en;
struct aead_request *req;
bool split;
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index a23a5cca9753..b6c4282899ec 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -5,7 +5,7 @@
config UNIX
tristate "Unix domain sockets"
- ---help---
+ help
If you say Y here, you will include support for Unix domain sockets;
sockets are the standard Unix mechanism for establishing and
accessing network connections. Many commonly used programs such as
@@ -29,6 +29,6 @@ config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
default n
- ---help---
+ help
Support for UNIX socket monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index dfbaf6bd8b1c..2700a63ab095 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -22,7 +22,7 @@
#include <net/af_vsock.h>
static struct workqueue_struct *virtio_vsock_workqueue;
-static struct virtio_vsock *the_virtio_vsock;
+static struct virtio_vsock __rcu *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
struct virtio_vsock {
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 813e93644ae7..faf74850a1b5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -25,13 +25,13 @@ config CFG80211
# using a different algorithm, though right now they shouldn't
# (this is here rather than below to allow it to be a module)
select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
- ---help---
+ help
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
For more information refer to documentation on the wireless wiki:
- http://wireless.kernel.org/en/developers/Documentation/cfg80211
+ https://wireless.wiki.kernel.org/en/developers/Documentation/cfg80211
When built as a module it will be called cfg80211.
@@ -71,7 +71,7 @@ config CFG80211_CERTIFICATION_ONUS
bool "cfg80211 certification onus"
depends on EXPERT
default n
- ---help---
+ help
You should disable this option unless you are both capable
and willing to ensure your system will remain regulatory
compliant with the features available under this option.
@@ -124,7 +124,7 @@ config CFG80211_EXTRA_REGDB_KEYDIR
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
- ---help---
+ help
This option enables support for parsing regulatory hints
from cellular base stations. If enabled and at least one driver
claims support for parsing cellular base station hints the
@@ -137,7 +137,7 @@ config CFG80211_REG_CELLULAR_HINTS
config CFG80211_REG_RELAX_NO_IR
bool "cfg80211 support for NO_IR relaxation"
depends on CFG80211_CERTIFICATION_ONUS
- ---help---
+ help
This option enables support for relaxation of the NO_IR flag for
situations that certain regulatory bodies have provided clarifications
on how relaxation can occur. This feature has an inherent dependency on
@@ -171,7 +171,7 @@ config CFG80211_DEFAULT_PS
config CFG80211_DEBUGFS
bool "cfg80211 DebugFS entries"
depends on DEBUG_FS
- ---help---
+ help
You can enable this if you want debugfs entries for cfg80211.
If unsure, say N.
@@ -228,7 +228,7 @@ config LIB80211_DEBUG
bool "lib80211 debugging messages"
depends on LIB80211
default n
- ---help---
+ help
You can enable this if you want verbose debugging messages
from lib80211.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f0226ae9561c..c623d9bf5096 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -497,6 +497,8 @@ use_default_name:
INIT_WORK(&rdev->propagate_radar_detect_wk,
cfg80211_propagate_radar_detect_wk);
INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
+ INIT_WORK(&rdev->mgmt_registrations_update_wk,
+ cfg80211_mgmt_registrations_update_wk);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -1047,6 +1049,7 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->sched_scan_stop_wk);
flush_work(&rdev->propagate_radar_detect_wk);
flush_work(&rdev->propagate_cac_done_wk);
+ flush_work(&rdev->mgmt_registrations_update_wk);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -1108,7 +1111,6 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
rdev->devlist_generation++;
cfg80211_mlme_purge_registrations(wdev);
- flush_work(&wdev->mgmt_registrations_update_wk);
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
@@ -1253,8 +1255,6 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
spin_lock_init(&wdev->mgmt_registrations_lock);
- INIT_WORK(&wdev->mgmt_registrations_update_wk,
- cfg80211_mgmt_registrations_update_wk);
INIT_LIST_HEAD(&wdev->pmsr_list);
spin_lock_init(&wdev->pmsr_lock);
INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e0e5b3ee9699..67b0389fca4d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -99,6 +99,8 @@ struct cfg80211_registered_device {
struct cfg80211_chan_def cac_done_chandef;
struct work_struct propagate_cac_done_wk;
+ struct work_struct mgmt_registrations_update_wk;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 189334314cba..a6c61a2e6569 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -440,9 +440,15 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
ASSERT_RTNL();
+ spin_lock_bh(&wdev->mgmt_registrations_lock);
+ if (!wdev->mgmt_registrations_need_update) {
+ spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ return;
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) {
- list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) {
+ list_for_each_entry(reg, &tmp->mgmt_registrations, list) {
u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4);
u32 mcast_mask = 0;
@@ -460,16 +466,23 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
}
rcu_read_unlock();
+ wdev->mgmt_registrations_need_update = 0;
+ spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
}
void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
{
- struct wireless_dev *wdev = container_of(wk, struct wireless_dev,
- mgmt_registrations_update_wk);
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+
+ rdev = container_of(wk, struct cfg80211_registered_device,
+ mgmt_registrations_update_wk);
rtnl_lock();
- cfg80211_mgmt_registrations_update(wdev);
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
+ cfg80211_mgmt_registrations_update(wdev);
rtnl_unlock();
}
@@ -557,6 +570,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
nreg->multicast_rx = multicast_rx;
list_add(&nreg->list, &wdev->mgmt_registrations);
}
+ wdev->mgmt_registrations_need_update = 1;
spin_unlock_bh(&wdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
@@ -585,7 +599,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
list_del(&reg->list);
kfree(reg);
- schedule_work(&wdev->mgmt_registrations_update_wk);
+ wdev->mgmt_registrations_need_update = 1;
+ schedule_work(&rdev->mgmt_registrations_update_wk);
}
spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -608,6 +623,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
list_del(&reg->list);
kfree(reg);
}
+ wdev->mgmt_registrations_need_update = 1;
spin_unlock_bh(&wdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 263ae395ad44..7fbca0854265 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5016,7 +5016,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params.he_obss_pd);
- goto out;
+ if (err)
+ goto out;
}
if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) {
@@ -5024,7 +5025,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_HE_BSS_COLOR],
&params.he_bss_color);
if (err)
- return err;
+ goto out;
}
nl80211_calculate_ap_params(&params);
@@ -13265,13 +13266,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
if (!wdev_running(wdev))
return -ENETDOWN;
}
-
- if (!vcmd->doit)
- return -EOPNOTSUPP;
} else {
wdev = NULL;
}
+ if (!vcmd->doit)
+ return -EOPNOTSUPP;
+
if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 9f0d58b0b90b..e3ed23245a82 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,7 +5,7 @@
config X25
tristate "CCITT X.25 Packet Layer"
- ---help---
+ help
X.25 is a set of standardized network protocols, similar in scope to
frame relay; the one physical line from your box to the X.25 network
entry point can carry several logical point-to-point connections
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b6c0f08bd80d..3700266229f6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk)
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
- if (unlikely(!skb)) {
- err = -EAGAIN;
+ if (unlikely(!skb))
goto out;
- }
skb_put(skb, len);
addr = desc.addr;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 540ed75e4482..08b80669f649 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -2,9 +2,6 @@
#include <net/xsk_buff_pool.h>
#include <net/xdp_sock.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/swiotlb.h>
#include "xsk_queue.h"
@@ -55,7 +52,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
pool->free_heads_cnt = chunks;
pool->headroom = headroom;
pool->chunk_size = chunk_size;
- pool->cheap_dma = true;
pool->unaligned = unaligned;
pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM;
INIT_LIST_HEAD(&pool->free_list);
@@ -125,48 +121,6 @@ static void xp_check_dma_contiguity(struct xsk_buff_pool *pool)
}
}
-static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool)
-{
-#if defined(CONFIG_SWIOTLB)
- phys_addr_t paddr;
- u32 i;
-
- for (i = 0; i < pool->dma_pages_cnt; i++) {
- paddr = dma_to_phys(pool->dev, pool->dma_pages[i]);
- if (is_swiotlb_buffer(paddr))
- return false;
- }
-#endif
- return true;
-}
-
-static bool xp_check_cheap_dma(struct xsk_buff_pool *pool)
-{
-#if defined(CONFIG_HAS_DMA)
- const struct dma_map_ops *ops = get_dma_ops(pool->dev);
-
- if (ops) {
- return !ops->sync_single_for_cpu &&
- !ops->sync_single_for_device;
- }
-
- if (!dma_is_direct(ops))
- return false;
-
- if (!xp_check_swiotlb_dma(pool))
- return false;
-
- if (!dev_is_dma_coherent(pool->dev)) {
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
- defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
- defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
- return false;
-#endif
- }
-#endif
- return true;
-}
-
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages)
{
@@ -180,6 +134,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
pool->dev = dev;
pool->dma_pages_cnt = nr_pages;
+ pool->dma_need_sync = false;
for (i = 0; i < pool->dma_pages_cnt; i++) {
dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
@@ -188,14 +143,13 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
xp_dma_unmap(pool, attrs);
return -ENOMEM;
}
+ if (dma_need_sync(dev, dma))
+ pool->dma_need_sync = true;
pool->dma_pages[i] = dma;
}
if (pool->unaligned)
xp_check_dma_contiguity(pool);
-
- pool->dev = dev;
- pool->cheap_dma = xp_check_cheap_dma(pool);
return 0;
}
EXPORT_SYMBOL(xp_dma_map);
@@ -280,7 +234,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
- if (!pool->cheap_dma) {
+ if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
pool->frame_len,
DMA_BIDIRECTIONAL);
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index b7fd9c838416..5b9a5ab48111 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -22,7 +22,7 @@ if INET
config XFRM_USER
tristate "Transformation user configuration interface"
select XFRM_ALGO
- ---help---
+ help
Support for Transformation(XFRM) user configuration interface
like IPsec used by native Linux tools.
@@ -31,7 +31,7 @@ config XFRM_USER
config XFRM_INTERFACE
tristate "Transformation virtual interface"
depends on XFRM && IPV6
- ---help---
+ help
This provides a virtual interface to route IPsec traffic.
If unsure, say N.
@@ -39,7 +39,7 @@ config XFRM_INTERFACE
config XFRM_SUB_POLICY
bool "Transformation sub policy support"
depends on XFRM
- ---help---
+ help
Support sub policy for developers. By using sub policy with main
one, two policies can be applied to the same packet at once.
Policy which lives shorter time in kernel should be a sub.
@@ -49,7 +49,7 @@ config XFRM_SUB_POLICY
config XFRM_MIGRATE
bool "Transformation migrate database"
depends on XFRM
- ---help---
+ help
A feature to update locator(s) of a given IPsec security
association dynamically. This feature is required, for
instance, in a Mobile IPv6 environment with IPsec configuration
@@ -60,13 +60,37 @@ config XFRM_MIGRATE
config XFRM_STATISTICS
bool "Transformation statistics"
depends on XFRM && PROC_FS
- ---help---
+ help
This statistics is not a SNMP/MIB specification but shows
statistics about transformation error (or almost error) factor
at packet processing for developer.
If unsure, say N.
+# This option selects XFRM_ALGO along with the AH authentication algorithms that
+# RFC 8221 lists as MUST be implemented.
+config XFRM_AH
+ tristate
+ select XFRM_ALGO
+ select CRYPTO
+ select CRYPTO_HMAC
+ select CRYPTO_SHA256
+
+# This option selects XFRM_ALGO along with the ESP encryption and authentication
+# algorithms that RFC 8221 lists as MUST be implemented.
+config XFRM_ESP
+ tristate
+ select XFRM_ALGO
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_AUTHENC
+ select CRYPTO_CBC
+ select CRYPTO_ECHAINIV
+ select CRYPTO_GCM
+ select CRYPTO_HMAC
+ select CRYPTO_SEQIV
+ select CRYPTO_SHA256
+
config XFRM_IPCOMP
tristate
select XFRM_ALGO
@@ -76,7 +100,7 @@ config XFRM_IPCOMP
config NET_KEY
tristate "PF_KEY sockets"
select XFRM_ALGO
- ---help---
+ help
PF_KEYv2 socket family, compatible to KAME ones.
They are required if you are going to use IPsec tools ported
from KAME.
@@ -87,7 +111,7 @@ config NET_KEY_MIGRATE
bool "PF_KEY MIGRATE"
depends on NET_KEY
select XFRM_MIGRATE
- ---help---
+ help
Add a PF_KEY MIGRATE message to PF_KEYv2 socket family.
The PF_KEY MIGRATE message is used to dynamically update
locator(s) of a given IPsec security association.
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 100e29682b48..827ccdf2db57 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
{
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
!sk_rmem_schedule(sk, skb, skb->truesize)) {
+ XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
@@ -49,23 +50,51 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
strp);
struct strp_msg *rxm = strp_msg(skb);
+ int len = rxm->full_len - 2;
u32 nonesp_marker;
int err;
+ /* keepalive packet? */
+ if (unlikely(len == 1)) {
+ u8 data;
+
+ err = skb_copy_bits(skb, rxm->offset + 2, &data, 1);
+ if (err < 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+ kfree_skb(skb);
+ return;
+ }
+
+ if (data == 0xff) {
+ kfree_skb(skb);
+ return;
+ }
+ }
+
+ /* drop other short messages */
+ if (unlikely(len <= sizeof(nonesp_marker))) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+ kfree_skb(skb);
+ return;
+ }
+
err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
sizeof(nonesp_marker));
if (err < 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
kfree_skb(skb);
return;
}
/* remove header, leave non-ESP marker/SPI */
if (!__pskb_pull(skb, rxm->offset + 2)) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
if (pskb_trim(skb, rxm->full_len - 2) != 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
@@ -91,7 +120,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
return err;
len = be16_to_cpu(blen);
- if (len < 6)
+ if (len < 2)
return -EINVAL;
return len;
@@ -109,8 +138,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
flags |= nonblock ? MSG_DONTWAIT : 0;
skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
- if (!skb)
+ if (!skb) {
+ if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
+ return 0;
return err;
+ }
copied = len;
if (copied > skb->len)
@@ -213,7 +245,7 @@ retry:
return 0;
}
-static int espintcp_push_msgs(struct sock *sk)
+static int espintcp_push_msgs(struct sock *sk, int flags)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
struct espintcp_msg *emsg = &ctx->partial;
@@ -227,12 +259,12 @@ static int espintcp_push_msgs(struct sock *sk)
ctx->tx_running = 1;
if (emsg->skb)
- err = espintcp_sendskb_locked(sk, emsg, 0);
+ err = espintcp_sendskb_locked(sk, emsg, flags);
else
- err = espintcp_sendskmsg_locked(sk, emsg, 0);
+ err = espintcp_sendskmsg_locked(sk, emsg, flags);
if (err == -EAGAIN) {
ctx->tx_running = 0;
- return 0;
+ return flags & MSG_DONTWAIT ? -EAGAIN : 0;
}
if (!err)
memset(emsg, 0, sizeof(*emsg));
@@ -257,7 +289,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
offset = skb_transport_offset(skb);
len = skb->len - offset;
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
if (emsg->len) {
kfree_skb(skb);
@@ -270,7 +302,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
emsg->len = len;
emsg->skb = skb;
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
return 0;
}
@@ -287,7 +319,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
char buf[2] = {0};
int err, end;
- if (msg->msg_flags)
+ if (msg->msg_flags & ~MSG_DONTWAIT)
return -EOPNOTSUPP;
if (size > MAX_ESPINTCP_MSG)
@@ -298,9 +330,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
lock_sock(sk);
- err = espintcp_push_msgs(sk);
+ err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
if (err < 0) {
- err = -ENOBUFS;
+ if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT))
+ err = -ENOBUFS;
goto unlock;
}
@@ -337,10 +370,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
tcp_rate_check_app_limited(sk);
- err = espintcp_push_msgs(sk);
+ err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
/* this message could be partially sent, keep it */
- if (err < 0)
- goto unlock;
+
release_sock(sk);
return size;
@@ -374,7 +406,7 @@ static void espintcp_tx_work(struct work_struct *work)
lock_sock(sk);
if (!ctx->tx_running)
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
release_sock(sk);
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index f50d1f97cf8e..626096bd0d29 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -108,7 +108,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
- if (!xo)
+ if (!xo || (xo->flags & XFRM_XMIT))
return skb;
if (!(features & NETIF_F_HW_ESP))
@@ -129,6 +129,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
return skb;
}
+ xo->flags |= XFRM_XMIT;
+
if (skb_is_gso(skb)) {
struct net_device *dev = skb->dev;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index c407ecbc5d46..b615729812e5 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -37,6 +37,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
@@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = {
static void xfrmi_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &xfrmi_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_NONE;
dev->mtu = ETH_DATA_LEN;
dev->min_mtu = ETH_MIN_MTU;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4c23f69f69f..a7ab19353313 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -574,16 +574,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
switch (x->outer_mode.family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
break;
case AF_INET6:
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
break;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 564aa6492e7c..19c5e0fa3f44 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,7 +39,7 @@
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h>
#endif
@@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_unlock_bh(&pq->hold_queue.lock);
}
-static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
- struct xfrm_policy *pol)
+static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+ struct xfrm_policy *pol)
{
- if (policy->mark.v == pol->mark.v &&
- policy->priority == pol->priority)
- return true;
-
- return false;
+ return mark->v == pol->mark.v && mark->m == pol->mark.m;
}
static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
@@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
delpol = pol;
@@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl)
@@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
EXPORT_SYMBOL(xfrm_policy_insert);
static struct xfrm_policy *
-__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx)
{
struct xfrm_policy *pol;
@@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
+ xfrm_policy_mark_match(mark, pol) &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security))
return pol;
@@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
return NULL;
}
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
- struct xfrm_sec_ctx *ctx, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx, int delete, int *err)
{
struct xfrm_pol_inexact_bin *bin = NULL;
struct xfrm_policy *pol, *ret = NULL;
@@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir, u32 id, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete, int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v) {
+ pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
xfrm_pol_hold(pol);
if (delete) {
*err = security_xfrm_policy_delete(
@@ -4156,7 +4149,7 @@ void __init xfrm_init(void)
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e6cfaa680ef3..fbb7d9d06478 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int delete;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
p = nlmsg_data(nlh);
@@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir,
+ p->index, delete, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
- ctx, delete, &err);
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
+ &p->sel, ctx, delete, &err);
security_xfrm_policy_free(ctx);
}
if (xp == NULL)
@@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
err = copy_from_user_policy_type(&type, attrs);
@@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index,
+ 0, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
&p->sel, ctx, 0, &err);
security_xfrm_policy_free(ctx);
}