summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c11
-rw-r--r--net/802/hippi.c16
-rw-r--r--net/8021q/vlan.c11
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/Kconfig12
-rw-r--r--net/atm/br2684.c6
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c19
-rw-r--r--net/atm/mpoa_caches.c43
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_addr.c2
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/ax25/ax25_ds_in.c2
-rw-r--r--net/ax25/ax25_ds_subr.c2
-rw-r--r--net/ax25/ax25_ds_timer.c2
-rw-r--r--net/ax25/ax25_iface.c2
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/ax25/ax25_ip.c2
-rw-r--r--net/ax25/ax25_out.c2
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_std_in.c2
-rw-r--r--net/ax25/ax25_std_subr.c2
-rw-r--r--net/ax25/ax25_std_timer.c2
-rw-r--r--net/ax25/ax25_subr.c4
-rw-r--r--net/ax25/ax25_timer.c2
-rw-r--r--net/ax25/ax25_uid.c2
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/bat_iv_ogm.c58
-rw-r--r--net/batman-adv/bat_v.c8
-rw-r--r--net/batman-adv/bat_v_elp.c71
-rw-r--r--net/batman-adv/bat_v_ogm.c75
-rw-r--r--net/batman-adv/debugfs.c26
-rw-r--r--net/batman-adv/distributed-arp-table.c84
-rw-r--r--net/batman-adv/fragmentation.c82
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c9
-rw-r--r--net/batman-adv/hard-interface.c225
-rw-r--r--net/batman-adv/hard-interface.h21
-rw-r--r--net/batman-adv/hash.h30
-rw-r--r--net/batman-adv/icmp_socket.c5
-rw-r--r--net/batman-adv/log.c4
-rw-r--r--net/batman-adv/log.h12
-rw-r--r--net/batman-adv/main.c16
-rw-r--r--net/batman-adv/main.h28
-rw-r--r--net/batman-adv/multicast.c70
-rw-r--r--net/batman-adv/multicast.h6
-rw-r--r--net/batman-adv/netlink.c31
-rw-r--r--net/batman-adv/network-coding.c43
-rw-r--r--net/batman-adv/originator.c23
-rw-r--r--net/batman-adv/packet.h12
-rw-r--r--net/batman-adv/routing.c180
-rw-r--r--net/batman-adv/send.c419
-rw-r--r--net/batman-adv/send.h11
-rw-r--r--net/batman-adv/soft-interface.c27
-rw-r--r--net/batman-adv/sysfs.c53
-rw-r--r--net/batman-adv/tp_meter.c6
-rw-r--r--net/batman-adv/translation-table.c39
-rw-r--r--net/batman-adv/tvlv.c5
-rw-r--r--net/batman-adv/types.h45
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bluetooth/bnep/netdev.c3
-rw-r--r--net/bluetooth/l2cap_core.c6
-rw-r--r--net/bluetooth/smp.c85
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_multicast.c188
-rw-r--r--net/bridge/br_netfilter_hooks.c30
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/bridge/br_netlink.c67
-rw-r--r--net/bridge/br_private.h11
-rw-r--r--net/bridge/br_private_stp.h1
-rw-r--r--net/bridge/br_stp.c65
-rw-r--r--net/bridge/br_stp_if.c14
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_sysfs_br.c40
-rw-r--r--net/bridge/netfilter/Kconfig1
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c11
-rw-r--r--net/bridge/netfilter/ebt_nflog.c6
-rw-r--r--net/bridge/netfilter/ebt_redirect.c6
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtables.c8
-rw-r--r--net/bridge/netfilter/nf_log_bridge.c17
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c30
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/cfcnfg.c9
-rw-r--r--net/can/af_can.c12
-rw-r--r--net/can/af_can.h3
-rw-r--r--net/can/bcm.c59
-rw-r--r--net/can/gw.c4
-rw-r--r--net/can/raw.c4
-rw-r--r--net/ceph/auth.c4
-rw-r--r--net/ceph/auth_x.c197
-rw-r--r--net/ceph/auth_x.h3
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/crypto.c463
-rw-r--r--net/ceph/crypto.h26
-rw-r--r--net/ceph/messenger.c19
-rw-r--r--net/ceph/mon_client.c12
-rw-r--r--net/ceph/osd_client.c39
-rw-r--r--net/compat.c2
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c74
-rw-r--r--net/core/dev.c900
-rw-r--r--net/core/devlink.c100
-rw-r--r--net/core/drop_monitor.c60
-rw-r--r--net/core/ethtool.c99
-rw-r--r--net/core/fib_rules.c78
-rw-r--r--net/core/filter.c338
-rw-r--r--net/core/flow.c60
-rw-r--r--net/core/flow_dissector.c38
-rw-r--r--net/core/gen_estimator.c296
-rw-r--r--net/core/gen_stats.c20
-rw-r--r--net/core/lwt_bpf.c397
-rw-r--r--net/core/lwtunnel.c83
-rw-r--r--net/core/neighbour.c16
-rw-r--r--net/core/net-sysfs.c65
-rw-r--r--net/core/net_namespace.c77
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c28
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/secure_seq.c11
-rw-r--r--net/core/skbuff.c82
-rw-r--r--net/core/sock.c92
-rw-r--r--net/core/stream.c28
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dccp/ipv4.c8
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/decnet/af_decnet.c16
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c2
-rw-r--r--net/decnet/dn_table.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/dsa/dsa2.c11
-rw-r--r--net/dsa/slave.c24
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/hsr/hsr_device.c1
-rw-r--r--net/hsr/hsr_netlink.c23
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h2
-rw-r--r--net/ieee802154/Makefile2
-rw-r--r--net/ieee802154/netlink.c24
-rw-r--r--net/ieee802154/nl-phy.c6
-rw-r--r--net/ieee802154/nl802154.c44
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c14
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c15
-rw-r--r--net/ipv4/fib_semantics.c23
-rw-r--r--net/ipv4/fib_trie.c157
-rw-r--r--net/ipv4/fou.c23
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/igmp.c9
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/inet_diag.c73
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c40
-rw-r--r--net/ipv4/ip_sockglue.c44
-rw-r--r--net/ipv4/ip_tunnel.c10
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/ipmr.c17
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c44
-rw-r--r--net/ipv4/netfilter/ip_tables.c44
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c43
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c11
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c18
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c145
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c41
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c163
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c236
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c15
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c4
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/raw.c35
-rw-r--r--net/ipv4/raw_diag.c266
-rw-r--r--net/ipv4/route.c124
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c118
-rw-r--r--net/ipv4/tcp_bbr.c32
-rw-r--r--net/ipv4/tcp_cong.c14
-rw-r--r--net/ipv4/tcp_dctcp.c1
-rw-r--r--net/ipv4/tcp_fastopen.c3
-rw-r--r--net/ipv4/tcp_highspeed.c11
-rw-r--r--net/ipv4/tcp_hybla.c1
-rw-r--r--net/ipv4/tcp_illinois.c10
-rw-r--r--net/ipv4/tcp_input.c35
-rw-r--r--net/ipv4/tcp_ipv4.c30
-rw-r--r--net/ipv4/tcp_lp.c1
-rw-r--r--net/ipv4/tcp_metrics.c24
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c196
-rw-r--r--net/ipv4/tcp_scalable.c15
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tcp_vegas.c1
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c1
-rw-r--r--net/ipv4/tcp_yeah.c10
-rw-r--r--net/ipv4/udp.c256
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv6/Kconfig35
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c54
-rw-r--r--net/ipv6/af_inet6.c20
-rw-r--r--net/ipv6/ah6.c5
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/exthdrs.c270
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/ila/ila_lwt.c93
-rw-r--r--net/ipv6/ila/ila_xlat.c43
-rw-r--r--net/ipv6/inet6_connection_sock.c11
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c54
-rw-r--r--net/ipv6/ip6_vti.c30
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ipcomp6.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c20
-rw-r--r--net/ipv6/mcast.c51
-rw-r--r--net/ipv6/mip6.c2
-rw-r--r--net/ipv6/ndisc.c29
-rw-r--r--net/ipv6/netfilter.c1
-rw-r--r--net/ipv6/netfilter/Kconfig14
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c23
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c11
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c146
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c42
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c151
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c270
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c6
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c14
-rw-r--r--net/ipv6/reassembly.c7
-rw-r--r--net/ipv6/route.c50
-rw-r--r--net/ipv6/seg6.c497
-rw-r--r--net/ipv6/seg6_hmac.c484
-rw-r--r--net/ipv6/seg6_iptunnel.c436
-rw-r--r--net/ipv6/sit.c18
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c18
-rw-r--r--net/ipv6/udp.c50
-rw-r--r--net/ipv6/udplite.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/ipx/af_ipx.c4
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c2
-rw-r--r--net/irda/irda_device.c2
-rw-r--r--net/irda/irlan/irlan_eth.c4
-rw-r--r--net/irda/irnet/irnet.h3
-rw-r--r--net/irda/irnet/irnet_ppp.h11
-rw-r--r--net/irda/irnetlink.c22
-rw-r--r--net/irda/irproc.c1
-rw-r--r--net/iucv/af_iucv.c59
-rw-r--r--net/iucv/iucv.c124
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_core.h10
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c19
-rw-r--r--net/l2tp/l2tp_ip6.c25
-rw-r--r--net/l2tp/l2tp_netlink.c59
-rw-r--r--net/l2tp/l2tp_ppp.c60
-rw-r--r--net/lapb/lapb_iface.c2
-rw-r--r--net/lapb/lapb_in.c2
-rw-r--r--net/lapb/lapb_out.c2
-rw-r--r--net/lapb/lapb_subr.c2
-rw-r--r--net/lapb/lapb_timer.c2
-rw-r--r--net/llc/af_llc.c24
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/aes_cmac.c8
-rw-r--r--net/mac80211/aes_cmac.h4
-rw-r--r--net/mac80211/agg-rx.c8
-rw-r--r--net/mac80211/cfg.c35
-rw-r--r--net/mac80211/chan.c3
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_netdev.c11
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/fils_aead.c342
-rw-r--r--net/mac80211/fils_aead.h19
-rw-r--r--net/mac80211/ieee80211_i.h30
-rw-r--r--net/mac80211/iface.c52
-rw-r--r--net/mac80211/key.c3
-rw-r--r--net/mac80211/main.c18
-rw-r--r--net/mac80211/mlme.c81
-rw-r--r--net/mac80211/rx.c49
-rw-r--r--net/mac80211/sta_info.c37
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/tx.c75
-rw-r--r--net/mac80211/util.c61
-rw-r--r--net/mac80211/vht.c4
-rw-r--r--net/mac80211/wme.c23
-rw-r--r--net/mac80211/wpa.c2
-rw-r--r--net/mac802154/Makefile2
-rw-r--r--net/mac802154/util.c4
-rw-r--r--net/mpls/af_mpls.c48
-rw-r--r--net/mpls/mpls_iptunnel.c6
-rw-r--r--net/netfilter/Kconfig58
-rw-r--r--net/netfilter/Makefile24
-rw-r--r--net/netfilter/core.c100
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h31
-rw-r--r--net/netfilter/ipset/ip_set_core.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h254
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c315
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c54
-rw-r--r--net/netfilter/nf_conntrack_core.c65
-rw-r--r--net/netfilter/nf_conntrack_proto.c158
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c101
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c100
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c103
-rw-r--r--net/netfilter/nf_conntrack_standalone.c10
-rw-r--r--net/netfilter/nf_dup_netdev.c35
-rw-r--r--net/netfilter/nf_internals.h5
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_log_common.c28
-rw-r--r--net/netfilter/nf_log_netdev.c81
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c36
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c40
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c35
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c771
-rw-r--r--net/netfilter/nf_tables_core.c91
-rw-r--r--net/netfilter/nf_tables_trace.c8
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c7
-rw-r--r--net/netfilter/nfnetlink_queue.c10
-rw-r--r--net/netfilter/nft_bitwise.c13
-rw-r--r--net/netfilter/nft_byteorder.c13
-rw-r--r--net/netfilter/nft_cmp.c16
-rw-r--r--net/netfilter/nft_counter.c223
-rw-r--r--net/netfilter/nft_ct.c76
-rw-r--r--net/netfilter/nft_dynset.c16
-rw-r--r--net/netfilter/nft_fib.c159
-rw-r--r--net/netfilter/nft_fib_inet.c82
-rw-r--r--net/netfilter/nft_fwd_netdev.c4
-rw-r--r--net/netfilter/nft_hash.c6
-rw-r--r--net/netfilter/nft_immediate.c16
-rw-r--r--net/netfilter/nft_log.c8
-rw-r--r--net/netfilter/nft_lookup.c21
-rw-r--r--net/netfilter/nft_masq.c6
-rw-r--r--net/netfilter/nft_meta.c11
-rw-r--r--net/netfilter/nft_nat.c11
-rw-r--r--net/netfilter/nft_numgen.c2
-rw-r--r--net/netfilter/nft_objref.c228
-rw-r--r--net/netfilter/nft_payload.c145
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_quota.c160
-rw-r--r--net/netfilter/nft_range.c13
-rw-r--r--net/netfilter/nft_redir.c6
-rw-r--r--net/netfilter/nft_reject_inet.c18
-rw-r--r--net/netfilter/nft_rt.c153
-rw-r--r--net/netfilter/nft_set_hash.c27
-rw-r--r--net/netfilter/nft_set_rbtree.c14
-rw-r--r--net/netfilter/x_tables.c60
-rw-r--r--net/netfilter/xt_AUDIT.c10
-rw-r--r--net/netfilter/xt_CONNSECMARK.c4
-rw-r--r--net/netfilter/xt_CT.c6
-rw-r--r--net/netfilter/xt_LOG.c6
-rw-r--r--net/netfilter/xt_NETMAP.c31
-rw-r--r--net/netfilter/xt_NFLOG.c6
-rw-r--r--net/netfilter/xt_NFQUEUE.c4
-rw-r--r--net/netfilter/xt_RATEEST.c4
-rw-r--r--net/netfilter/xt_REDIRECT.c16
-rw-r--r--net/netfilter/xt_TCPMSS.c4
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_addrtype.c10
-rw-r--r--net/netfilter/xt_bpf.c96
-rw-r--r--net/netfilter/xt_cluster.c2
-rw-r--r--net/netfilter/xt_connbytes.c4
-rw-r--r--net/netfilter/xt_connlabel.c6
-rw-r--r--net/netfilter/xt_connlimit.c14
-rw-r--r--net/netfilter/xt_connmark.c8
-rw-r--r--net/netfilter/xt_conntrack.c12
-rw-r--r--net/netfilter/xt_devgroup.c4
-rw-r--r--net/netfilter/xt_dscp.c2
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netfilter/xt_helper.c4
-rw-r--r--net/netfilter/xt_ipvs.c4
-rw-r--r--net/netfilter/xt_multiport.c52
-rw-r--r--net/netfilter/xt_nat.c18
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_osf.c10
-rw-r--r--net/netfilter/xt_owner.c2
-rw-r--r--net/netfilter/xt_pkttype.c4
-rw-r--r--net/netfilter/xt_policy.c4
-rw-r--r--net/netfilter/xt_rateest.c28
-rw-r--r--net/netfilter/xt_recent.c12
-rw-r--r--net/netfilter/xt_set.c38
-rw-r--r--net/netfilter/xt_socket.c336
-rw-r--r--net/netfilter/xt_state.c4
-rw-r--r--net/netfilter/xt_time.c2
-rw-r--r--net/netlabel/netlabel_calipso.c21
-rw-r--r--net/netlabel/netlabel_cipso_v4.c22
-rw-r--r--net/netlabel/netlabel_kapi.c5
-rw-r--r--net/netlabel/netlabel_mgmt.c21
-rw-r--r--net/netlabel/netlabel_unlabeled.c21
-rw-r--r--net/netlink/af_netlink.c10
-rw-r--r--net/netlink/genetlink.c323
-rw-r--r--net/nfc/netlink.c34
-rw-r--r--net/openvswitch/actions.c129
-rw-r--r--net/openvswitch/conntrack.c14
-rw-r--r--net/openvswitch/datapath.c34
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c113
-rw-r--r--net/openvswitch/flow.h22
-rw-r--r--net/openvswitch/flow_netlink.c178
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c10
-rw-r--r--net/openvswitch/vport.c48
-rw-r--r--net/openvswitch/vport.h3
-rw-r--r--net/packet/af_packet.c76
-rw-r--r--net/phonet/pep-gprs.c12
-rw-r--r--net/phonet/pep.c9
-rw-r--r--net/phonet/pn_dev.c2
-rw-r--r--net/qrtr/qrtr.c4
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/connection.c18
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rds/rdma_transport.c5
-rw-r--r--net/rds/rds.h12
-rw-r--r--net/rds/recv.c36
-rw-r--r--net/rds/send.c9
-rw-r--r--net/rds/tcp.c22
-rw-r--r--net/rds/tcp_connect.c14
-rw-r--r--net/rds/tcp_listen.c31
-rw-r--r--net/rds/tcp_send.c3
-rw-r--r--net/rds/threads.c3
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rose/rose_route.c2
-rw-r--r--net/rxrpc/af_rxrpc.c11
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/input.c7
-rw-r--r--net/sched/act_api.c14
-rw-r--r--net/sched/act_bpf.c19
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c85
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c23
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c23
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c19
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c28
-rw-r--r--net/sched/cls_bpf.c49
-rw-r--r--net/sched/cls_flower.c307
-rw-r--r--net/sched/cls_matchall.c127
-rw-r--r--net/sched/em_ipset.c17
-rw-r--r--net/sched/em_meta.c9
-rw-r--r--net/sched/sch_api.c13
-rw-r--r--net/sched/sch_cbq.c8
-rw-r--r--net/sched/sch_drr.c6
-rw-r--r--net/sched/sch_fq.c18
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_qfq.c8
-rw-r--r--net/sched/sch_teql.c5
-rw-r--r--net/sctp/associola.c12
-rw-r--r--net/sctp/bind_addr.c3
-rw-r--r--net/sctp/chunk.c32
-rw-r--r--net/sctp/endpointola.c5
-rw-r--r--net/sctp/input.c93
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--net/sctp/offload.c2
-rw-r--r--net/sctp/output.c441
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/protocol.c40
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/socket.c40
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c7
-rw-r--r--net/sunrpc/netns.h2
-rw-r--r--net/sunrpc/stats.c10
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc.c14
-rw-r--r--net/sunrpc/svc_xprt.c10
-rw-r--r--net/sunrpc/svcauth.c3
-rw-r--r--net/sunrpc/svcsock.c26
-rw-r--r--net/sunrpc/sysctl.c2
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c94
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c36
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c5
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c23
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c116
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c94
-rw-r--r--net/sunrpc/xprtrdma/transport.c34
-rw-r--r--net/sunrpc/xprtrdma/verbs.c37
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h31
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c5
-rw-r--r--net/tipc/core.c2
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/discover.c4
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c20
-rw-r--r--net/tipc/msg.h4
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/netlink.c27
-rw-r--r--net/tipc/netlink_compat.c25
-rw-r--r--net/tipc/node.c9
-rw-r--r--net/tipc/server.c48
-rw-r--r--net/tipc/socket.c500
-rw-r--r--net/tipc/subscr.c124
-rw-r--r--net/tipc/subscr.h1
-rw-r--r--net/unix/af_unix.c39
-rw-r--r--net/vmw_vsock/virtio_transport.c58
-rw-r--r--net/vmw_vsock/virtio_transport_common.c27
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c30
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c30
-rw-r--r--net/wimax/stack.c22
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c33
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/lib80211_crypt_tkip.c2
-rw-r--r--net/wireless/mesh.c2
-rw-r--r--net/wireless/mlme.c18
-rw-r--r--net/wireless/nl80211.c582
-rw-r--r--net/wireless/rdev-ops.h24
-rw-r--r--net/wireless/sme.c16
-rw-r--r--net/wireless/trace.h37
-rw-r--r--net/wireless/util.c125
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/sysctl_net_x25.c2
-rw-r--r--net/x25/x25_link.c2
-rw-r--r--net/xfrm/xfrm_policy.c1
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c2
590 files changed, 16393 insertions, 7536 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 7b9219022418..1bb496ea997e 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -10,7 +10,7 @@
* v 1.0 03/22/99
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7d3a0af954e8..6356623fc238 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
EXPORT_SYMBOL(fddi_type_trans);
-int fddi_change_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-EXPORT_SYMBOL(fddi_change_mtu);
-
static const struct header_ops fddi_header_ops = {
.create = fddi_header,
};
@@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev)
dev->type = ARPHRD_FDDI;
dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */
dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */
+ dev->min_mtu = FDDI_K_SNAP_HLEN;
+ dev->max_mtu = FDDI_K_SNAP_DLEN;
dev->addr_len = FDDI_K_ALEN;
dev->tx_queue_len = 100; /* Long queues on FDDI */
dev->flags = IFF_BROADCAST | IFF_MULTICAST;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index ade1a52cdcff..4460606e9c36 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -34,7 +34,7 @@
#include <linux/errno.h>
#include <net/arp.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
* Create the HIPPI MAC header for an arbitrary protocol layer
@@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
EXPORT_SYMBOL(hippi_type_trans);
-int hippi_change_mtu(struct net_device *dev, int new_mtu)
-{
- /*
- * HIPPI's got these nice large MTUs.
- */
- if ((new_mtu < 68) || (new_mtu > 65280))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-EXPORT_SYMBOL(hippi_change_mtu);
-
/*
* For HIPPI we will actually use the lower 4 bytes of the hardware
* address as the I-FIELD rather than the actual hardware address.
@@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev)
dev->type = ARPHRD_HIPPI;
dev->hard_header_len = HIPPI_HLEN;
dev->mtu = 65280;
+ dev->min_mtu = 68;
+ dev->max_mtu = 65280;
dev->addr_len = HIPPI_ALEN;
dev->tx_queue_len = 25 /* 5 */;
memset(dev->broadcast, 0xFF, HIPPI_ALEN);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f2531ad66b68..467069b73ce1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -34,7 +34,7 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/if_vlan.h>
#include "vlan.h"
@@ -44,7 +44,7 @@
/* Global VLAN variables */
-int vlan_net_id __read_mostly;
+unsigned int vlan_net_id __read_mostly;
const char vlan_fullname[] = "802.1Q VLAN Support";
const char vlan_version[] = DRV_VERSION;
@@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
return -EFAULT;
/* Null terminate this sucker, just in case. */
- args.device1[23] = 0;
- args.u.device2[23] = 0;
+ args.device1[sizeof(args.device1) - 1] = 0;
+ args.u.device2[sizeof(args.u.device2) - 1] = 0;
rtnl_lock();
@@ -571,8 +571,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
- if ((args.u.name_type >= 0) &&
- (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
+ if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
struct vlan_net *vn;
vn = net_generic(net, vlan_net_id);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index cc1557978066..df8bd65dd370 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -159,7 +159,7 @@ void vlan_netlink_fini(void);
extern struct rtnl_link_ops vlan_link_ops;
-extern int vlan_net_id;
+extern unsigned int vlan_net_id;
struct proc_dir_entry;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fbfacd51aa34..10da6c588bf8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -826,5 +826,8 @@ void vlan_setup(struct net_device *dev)
dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
+
eth_zero_addr(dev->broadcast);
}
diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd340b72b..a29bb4b41c50 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -258,10 +258,6 @@ config XPS
config HWBM
bool
-config SOCK_CGROUP_DATA
- bool
- default n
-
config CGROUP_NET_PRIO
bool "Network priority cgroup"
depends on CGROUPS
@@ -402,6 +398,14 @@ config LWTUNNEL
weight tunnel endpoint. Tunnel encapsulation parameters are stored
with light weight tunnel state associated with fib routes.
+config LWTUNNEL_BPF
+ bool "Execute BPF program as route nexthop action"
+ depends on LWTUNNEL
+ default y if LWTUNNEL=y
+ ---help---
+ Allows to run BPF programs as a nexthop action following a route
+ lookup for incoming and outgoing packets.
+
config DST_CACHE
bool
default n
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index aa0047c5c467..fca84e111c89 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -620,14 +620,12 @@ error:
static const struct net_device_ops br2684_netdev_ops = {
.ndo_start_xmit = br2684_start_xmit,
.ndo_set_mac_address = br2684_mac_addr,
- .ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
static const struct net_device_ops br2684_netdev_ops_routed = {
.ndo_start_xmit = br2684_start_xmit,
.ndo_set_mac_address = br2684_mac_addr,
- .ndo_change_mtu = eth_change_mtu
};
static void br2684_setup(struct net_device *netdev)
@@ -651,7 +649,9 @@ static void br2684_setup_routed(struct net_device *netdev)
netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */
netdev->netdev_ops = &br2684_netdev_ops_routed;
netdev->addr_len = 0;
- netdev->mtu = 1500;
+ netdev->mtu = ETH_DATA_LEN;
+ netdev->min_mtu = 0;
+ netdev->max_mtu = ETH_MAX_MTU;
netdev->type = ARPHRD_PPP;
netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
netdev->tx_queue_len = 100;
diff --git a/net/atm/common.c b/net/atm/common.c
index 6dc12305799e..a3ca922d307b 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
goto out;
skb->dev = NULL; /* for paths shared with net_device interfaces */
ATM_SKB(skb)->atm_options = vcc->atm_options;
- if (copy_from_iter(skb_put(skb, size), size, &m->msg_iter) != size) {
+ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
kfree_skb(skb);
error = -EFAULT;
goto out;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5d2693826afb..09cfe87f0a44 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -111,9 +111,9 @@ static inline void lec_arp_put(struct lec_arp_table *entry)
}
static struct lane2_ops lane2_ops = {
- lane2_resolve, /* resolve, spec 3.1.3 */
- lane2_associate_req, /* associate_req, spec 3.1.4 */
- NULL /* associate indicator, spec 3.1.5 */
+ .resolve = lane2_resolve, /* spec 3.1.3 */
+ .associate_req = lane2_associate_req, /* spec 3.1.4 */
+ .associate_indicator = NULL /* spec 3.1.5 */
};
static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
return 0;
}
-/* shamelessly stolen from drivers/net/net_init.c */
-static int lec_change_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < 68) || (new_mtu > 18190))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static void lec_set_multicast_list(struct net_device *dev)
{
/*
@@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = {
.ndo_open = lec_open,
.ndo_stop = lec_close,
.ndo_start_xmit = lec_start_xmit,
- .ndo_change_mtu = lec_change_mtu,
.ndo_tx_timeout = lec_tx_timeout,
.ndo_set_rx_mode = lec_set_multicast_list,
};
@@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
if (!dev_lec[i])
return -ENOMEM;
dev_lec[i]->netdev_ops = &lec_netdev_ops;
+ dev_lec[i]->max_mtu = 18190;
snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
if (register_netdev(dev_lec[i])) {
free_netdev(dev_lec[i]);
@@ -1068,7 +1059,9 @@ static void __exit lane_module_cleanup(void)
{
int i;
+#ifdef CONFIG_PROC_FS
remove_proc_entry("lec", atm_proc_root);
+#endif
deregister_atm_ioctl(&lane_ioctl_ops);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 9e60e74c807d..a89fdebeffda 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -535,33 +535,32 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
static const struct in_cache_ops ingress_ops = {
- in_cache_add_entry, /* add_entry */
- in_cache_get, /* get */
- in_cache_get_with_mask, /* get_with_mask */
- in_cache_get_by_vcc, /* get_by_vcc */
- in_cache_put, /* put */
- in_cache_remove_entry, /* remove_entry */
- cache_hit, /* cache_hit */
- clear_count_and_expired, /* clear_count */
- check_resolving_entries, /* check_resolving */
- refresh_entries, /* refresh */
- in_destroy_cache /* destroy_cache */
+ .add_entry = in_cache_add_entry,
+ .get = in_cache_get,
+ .get_with_mask = in_cache_get_with_mask,
+ .get_by_vcc = in_cache_get_by_vcc,
+ .put = in_cache_put,
+ .remove_entry = in_cache_remove_entry,
+ .cache_hit = cache_hit,
+ .clear_count = clear_count_and_expired,
+ .check_resolving = check_resolving_entries,
+ .refresh = refresh_entries,
+ .destroy_cache = in_destroy_cache
};
static const struct eg_cache_ops egress_ops = {
- eg_cache_add_entry, /* add_entry */
- eg_cache_get_by_cache_id, /* get_by_cache_id */
- eg_cache_get_by_tag, /* get_by_tag */
- eg_cache_get_by_vcc, /* get_by_vcc */
- eg_cache_get_by_src_ip, /* get_by_src_ip */
- eg_cache_put, /* put */
- eg_cache_remove_entry, /* remove_entry */
- update_eg_cache_entry, /* update */
- clear_expired, /* clear_expired */
- eg_destroy_cache /* destroy_cache */
+ .add_entry = eg_cache_add_entry,
+ .get_by_cache_id = eg_cache_get_by_cache_id,
+ .get_by_tag = eg_cache_get_by_tag,
+ .get_by_vcc = eg_cache_get_by_vcc,
+ .get_by_src_ip = eg_cache_get_by_src_ip,
+ .put = eg_cache_put,
+ .remove_entry = eg_cache_remove_entry,
+ .update = update_eg_cache_entry,
+ .clear_expired = clear_expired,
+ .destroy_cache = eg_destroy_cache
};
-
void atm_mpoa_init_cache(struct mpoa_client *mpc)
{
mpc->in_ops = &ingress_ops;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2fdebabbfacd..90fcf5fc2e0a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -32,7 +32,7 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/termios.h> /* For TIOCINQ/OUTQ */
#include <linux/mm.h>
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index e7c9b0ea17a1..ac2542b7be88 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -21,7 +21,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 3d106767b272..9a3a301e1e2f 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -23,7 +23,7 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 9bd31e88aeca..891596e74278 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -22,7 +22,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index e05bd57b5afd..28827e81ba2b 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -23,7 +23,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5237dff6941d..5fb2104b7304 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -24,7 +24,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 7f16e8a931b2..8c07c28569e4 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -23,7 +23,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index bb5a0e4e98d9..860752639b1a 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 2fa3be965101..183b1c583d56 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -23,7 +23,7 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/termios.h> /* For TIOCINQ/OUTQ */
#include <linux/mm.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 8ddd41baa81c..b11a5f466fcc 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -25,7 +25,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d39097737e38..e1fda27cb27c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -31,7 +31,7 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index 3fbf8f7b2cf4..8632b86e843e 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -29,7 +29,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
index 8b66a41e538f..94bd06396a43 100644
--- a/net/ax25/ax25_std_subr.c
+++ b/net/ax25/ax25_std_subr.c
@@ -20,7 +20,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 2c0d6ef66f9d..30bbc675261d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -24,7 +24,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 655a7d4c96e1..038b109b2be7 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
@@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- if (!sock_flag(ax25->sk, SOCK_DESTROY))
+ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
ax25_stop_heartbeat(ax25);
ax25_stop_t1timer(ax25);
ax25_stop_t2timer(ax25);
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index c3cffa79bafb..23a6f38a80bf 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -28,7 +28,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 4ad2fb7bcd35..0403b0def7e6 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -25,7 +25,7 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f20742cbae6d..b73b96a2854b 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -17,7 +17,7 @@ config BATMAN_ADV
config BATMAN_ADV_BATMAN_V
bool "B.A.T.M.A.N. V protocol (experimental)"
- depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m)
+ depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
default n
help
This option enables the B.A.T.M.A.N. V protocol, the successor
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e2d18d0b1f06..f00f666e2ccd 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -698,7 +698,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
if (!forw_packet_aggr->skb) {
- batadv_forw_packet_free(forw_packet_aggr);
+ batadv_forw_packet_free(forw_packet_aggr, true);
return;
}
@@ -717,17 +717,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
if (direct_link)
forw_packet_aggr->direct_link_flags |= 1;
- /* add new packet to packet list */
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- /* start timer for this packet */
INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
batadv_iv_send_outstanding_bat_ogm_packet);
- queue_delayed_work(batadv_event_workqueue,
- &forw_packet_aggr->delayed_work,
- send_time - jiffies);
+
+ batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time);
}
/* aggregate a new packet into the existing ogm packet */
@@ -1272,7 +1265,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
*/
tq_iface_penalty = BATADV_TQ_MAX_VALUE;
if (if_outgoing && (if_incoming == if_outgoing) &&
- batadv_is_wifi_netdev(if_outgoing->net_dev))
+ batadv_is_wifi_hardif(if_outgoing))
tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
bat_priv);
@@ -1611,7 +1604,7 @@ out:
if (hardif_neigh)
batadv_hardif_neigh_put(hardif_neigh);
- kfree_skb(skb_priv);
+ consume_skb(skb_priv);
}
/**
@@ -1783,17 +1776,17 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
struct delayed_work *delayed_work;
struct batadv_forw_packet *forw_packet;
struct batadv_priv *bat_priv;
+ bool dropped = false;
delayed_work = to_delayed_work(work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_del(&forw_packet->list);
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
- if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
+ dropped = true;
goto out;
+ }
batadv_iv_ogm_emit(forw_packet);
@@ -1810,7 +1803,10 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
batadv_iv_ogm_schedule(forw_packet->if_incoming);
out:
- batadv_forw_packet_free(forw_packet);
+ /* do we get something for free()? */
+ if (batadv_forw_packet_steal(forw_packet,
+ &bat_priv->forw_bat_list_lock))
+ batadv_forw_packet_free(forw_packet, dropped);
}
static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1820,17 +1816,18 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
struct batadv_ogm_packet *ogm_packet;
u8 *packet_pos;
int ogm_offset;
- bool ret;
+ bool res;
+ int ret = NET_RX_DROP;
- ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
- if (!ret)
- return NET_RX_DROP;
+ res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
+ if (!res)
+ goto free_skb;
/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
* that does not have B.A.T.M.A.N. IV enabled ?
*/
if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
- return NET_RX_DROP;
+ goto free_skb;
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -1851,8 +1848,15 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
ogm_packet = (struct batadv_ogm_packet *)packet_pos;
}
- kfree_skb(skb);
- return NET_RX_SUCCESS;
+ ret = NET_RX_SUCCESS;
+
+free_skb:
+ if (ret == NET_RX_SUCCESS)
+ consume_skb(skb);
+ else
+ kfree_skb(skb);
+
+ return ret;
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -2486,7 +2490,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
struct batadv_orig_node *orig_node;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
orig_node = gw_node->orig_node;
router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
if (!router)
@@ -2674,7 +2678,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
" Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
/* fails if orig_node has no router */
if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
continue;
@@ -2774,7 +2778,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx = 0;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e79f6f01182e..2ac612d7bab4 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -750,7 +750,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
u32 max_bw = 0, bw;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
if (!kref_get_unless_zero(&gw_node->refcount))
continue;
@@ -787,7 +787,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
struct batadv_orig_node *curr_gw_orig,
struct batadv_orig_node *orig_node)
{
- struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+ struct batadv_gw_node *curr_gw, *orig_gw = NULL;
u32 gw_throughput, orig_throughput, threshold;
bool ret = false;
@@ -889,7 +889,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
" Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
/* fails if orig_node has no router */
if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
continue;
@@ -1009,7 +1009,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx = 0;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index ee08540ce503..f2fb2f05b6bf 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -75,6 +75,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
{
struct batadv_hard_iface *hard_iface = neigh->if_incoming;
struct ethtool_link_ksettings link_settings;
+ struct net_device *real_netdev;
struct station_info sinfo;
u32 throughput;
int ret;
@@ -89,23 +90,27 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
/* if this is a wireless device, then ask its throughput through
* cfg80211 API
*/
- if (batadv_is_wifi_netdev(hard_iface->net_dev)) {
- if (hard_iface->net_dev->ieee80211_ptr) {
- ret = cfg80211_get_station(hard_iface->net_dev,
- neigh->addr, &sinfo);
- if (ret == -ENOENT) {
- /* Node is not associated anymore! It would be
- * possible to delete this neighbor. For now set
- * the throughput metric to 0.
- */
- return 0;
- }
- if (!ret)
- return sinfo.expected_throughput / 100;
+ if (batadv_is_wifi_hardif(hard_iface)) {
+ if (!batadv_is_cfg80211_hardif(hard_iface))
+ /* unsupported WiFi driver version */
+ goto default_throughput;
+
+ real_netdev = batadv_get_real_netdev(hard_iface->net_dev);
+ if (!real_netdev)
+ goto default_throughput;
+
+ ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+
+ dev_put(real_netdev);
+ if (ret == -ENOENT) {
+ /* Node is not associated anymore! It would be
+ * possible to delete this neighbor. For now set
+ * the throughput metric to 0.
+ */
+ return 0;
}
-
- /* unsupported WiFi driver version */
- goto default_throughput;
+ if (!ret)
+ return sinfo.expected_throughput / 100;
}
/* if not a wifi interface, check if this device provides data via
@@ -187,7 +192,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
int elp_skb_len;
/* this probing routine is for Wifi neighbours only */
- if (!batadv_is_wifi_netdev(hard_iface->net_dev))
+ if (!batadv_is_wifi_hardif(hard_iface))
return true;
/* probe the neighbor only if no unicast packets have been sent
@@ -352,7 +357,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
/* warn the user (again) if there is no throughput data is available */
hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT;
- if (batadv_is_wifi_netdev(hard_iface->net_dev))
+ if (batadv_is_wifi_hardif(hard_iface))
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq,
@@ -492,20 +497,21 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_elp_packet *elp_packet;
struct batadv_hard_iface *primary_if;
struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
- bool ret;
+ bool res;
+ int ret = NET_RX_DROP;
- ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
- if (!ret)
- return NET_RX_DROP;
+ res = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
+ if (!res)
+ goto free_skb;
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
- return NET_RX_DROP;
+ goto free_skb;
/* did we receive a B.A.T.M.A.N. V ELP packet on an interface
* that does not have B.A.T.M.A.N. V ELP enabled ?
*/
if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
- return NET_RX_DROP;
+ goto free_skb;
elp_packet = (struct batadv_elp_packet *)skb->data;
@@ -516,14 +522,19 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
- goto out;
+ goto free_skb;
batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming,
elp_packet);
-out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- consume_skb(skb);
- return NET_RX_SUCCESS;
+ ret = NET_RX_SUCCESS;
+ batadv_hardif_put(primary_if);
+
+free_skb:
+ if (ret == NET_RX_SUCCESS)
+ consume_skb(skb);
+ else
+ kfree_skb(skb);
+
+ return ret;
}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1aeeadca620c..38b9aab83fc0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -140,6 +140,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
unsigned char *ogm_buff, *pkt_buff;
int ogm_buff_len;
u16 tvlv_len = 0;
+ int ret;
bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
@@ -182,6 +183,31 @@ static void batadv_v_ogm_send(struct work_struct *work)
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
+ ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL);
+ if (ret) {
+ char *type;
+
+ switch (ret) {
+ case BATADV_HARDIF_BCAST_NORECIPIENT:
+ type = "no neighbor";
+ break;
+ case BATADV_HARDIF_BCAST_DUPFWD:
+ type = "single neighbor is source";
+ break;
+ case BATADV_HARDIF_BCAST_DUPORIG:
+ type = "single neighbor is originator";
+ break;
+ default:
+ type = "unknown";
+ }
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+ hard_iface->net_dev->name, type);
+
+ batadv_hardif_put(hard_iface);
+ continue;
+ }
+
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -401,7 +427,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing)
{
- struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+ struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
bool protection_started = false;
int ret = -EINVAL;
@@ -486,7 +512,7 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_neigh_node *router = NULL;
- struct batadv_orig_node *orig_neigh_node = NULL;
+ struct batadv_orig_node *orig_neigh_node;
struct batadv_neigh_node *orig_neigh_router = NULL;
struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
u32 router_throughput, neigh_throughput;
@@ -651,6 +677,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *hard_iface;
struct batadv_ogm2_packet *ogm_packet;
u32 ogm_throughput, link_throughput, path_throughput;
+ int ret;
ethhdr = eth_hdr(skb);
ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset);
@@ -716,6 +743,35 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
+ ret = batadv_hardif_no_broadcast(hard_iface,
+ ogm_packet->orig,
+ hardif_neigh->orig);
+
+ if (ret) {
+ char *type;
+
+ switch (ret) {
+ case BATADV_HARDIF_BCAST_NORECIPIENT:
+ type = "no neighbor";
+ break;
+ case BATADV_HARDIF_BCAST_DUPFWD:
+ type = "single neighbor is source";
+ break;
+ case BATADV_HARDIF_BCAST_DUPORIG:
+ type = "single neighbor is originator";
+ break;
+ default:
+ type = "unknown";
+ }
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+ ogm_packet->orig, hard_iface->net_dev->name,
+ type);
+
+ batadv_hardif_put(hard_iface);
+ continue;
+ }
+
batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
orig_node, neigh_node,
if_incoming, hard_iface);
@@ -754,18 +810,18 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
* B.A.T.M.A.N. V enabled ?
*/
if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
- return NET_RX_DROP;
+ goto free_skb;
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
- return NET_RX_DROP;
+ goto free_skb;
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
- return NET_RX_DROP;
+ goto free_skb;
ogm_packet = (struct batadv_ogm2_packet *)skb->data;
if (batadv_is_my_mac(bat_priv, ogm_packet->orig))
- return NET_RX_DROP;
+ goto free_skb;
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -786,7 +842,12 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
}
ret = NET_RX_SUCCESS;
- consume_skb(skb);
+
+free_skb:
+ if (ret == NET_RX_SUCCESS)
+ consume_skb(skb);
+ else
+ kfree_skb(skb);
return ret;
}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index b4ffba7dd583..77925504379d 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -186,7 +186,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \
/* the following attributes are general and therefore they will be directly
* placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
*/
-static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open);
+static BATADV_DEBUGINFO(routing_algos, 0444, batadv_algorithms_open);
static struct batadv_debuginfo *batadv_general_debuginfos[] = {
&batadv_debuginfo_routing_algos,
@@ -194,26 +194,24 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = {
};
/* The following attributes are per soft interface */
-static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open);
-static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open);
-static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open);
-static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
- batadv_transtable_global_open);
+static BATADV_DEBUGINFO(neighbors, 0444, neighbors_open);
+static BATADV_DEBUGINFO(originators, 0444, batadv_originators_open);
+static BATADV_DEBUGINFO(gateways, 0444, batadv_gateways_open);
+static BATADV_DEBUGINFO(transtable_global, 0444, batadv_transtable_global_open);
#ifdef CONFIG_BATMAN_ADV_BLA
-static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open);
-static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO,
+static BATADV_DEBUGINFO(bla_claim_table, 0444, batadv_bla_claim_table_open);
+static BATADV_DEBUGINFO(bla_backbone_table, 0444,
batadv_bla_backbone_table_open);
#endif
#ifdef CONFIG_BATMAN_ADV_DAT
-static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
+static BATADV_DEBUGINFO(dat_cache, 0444, batadv_dat_cache_open);
#endif
-static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
- batadv_transtable_local_open);
+static BATADV_DEBUGINFO(transtable_local, 0444, batadv_transtable_local_open);
#ifdef CONFIG_BATMAN_ADV_NC
-static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
+static BATADV_DEBUGINFO(nc_nodes, 0444, batadv_nc_nodes_open);
#endif
#ifdef CONFIG_BATMAN_ADV_MCAST
-static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open);
+static BATADV_DEBUGINFO(mcast_flags, 0444, batadv_mcast_flags_open);
#endif
static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
@@ -253,7 +251,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \
}, \
}
-static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO,
+static BATADV_HARDIF_DEBUGINFO(originators, 0444,
batadv_originators_hardif_open);
static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index e257efdc5d03..49576c5a3fe3 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -369,12 +369,11 @@ out:
* batadv_dbg_arp - print a debug message containing all the ARP packet details
* @bat_priv: the bat priv with all the soft interface information
* @skb: ARP packet
- * @type: ARP type
* @hdr_size: size of the possible header before the ARP packet
* @msg: message to print together with the debugging information
*/
static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
- u16 type, int hdr_size, char *msg)
+ int hdr_size, char *msg)
{
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
struct batadv_bcast_packet *bcast_pkt;
@@ -441,7 +440,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
#else
static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
- u16 type, int hdr_size, char *msg)
+ int hdr_size, char *msg)
{
}
@@ -950,6 +949,41 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
}
/**
+ * batadv_dat_arp_create_reply - create an ARP Reply
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ip_src: ARP sender IP
+ * @ip_dst: ARP target IP
+ * @hw_src: Ethernet source and ARP sender MAC
+ * @hw_dst: Ethernet destination and ARP target MAC
+ * @vid: VLAN identifier (optional, set to zero otherwise)
+ *
+ * Creates an ARP Reply from the given values, optionally encapsulated in a
+ * VLAN header.
+ *
+ * Return: An skb containing an ARP Reply.
+ */
+static struct sk_buff *
+batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src,
+ __be32 ip_dst, u8 *hw_src, u8 *hw_dst,
+ unsigned short vid)
+{
+ struct sk_buff *skb;
+
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface,
+ ip_src, hw_dst, hw_src, hw_dst);
+ if (!skb)
+ return NULL;
+
+ skb_reset_mac_header(skb);
+
+ if (vid & BATADV_VLAN_HAS_TAG)
+ skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
+ vid & VLAN_VID_MASK);
+
+ return skb;
+}
+
+/**
* batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
* answer using DAT
* @bat_priv: the bat priv with all the soft interface information
@@ -983,8 +1017,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
if (type != ARPOP_REQUEST)
goto out;
- batadv_dbg_arp(bat_priv, skb, type, hdr_size,
- "Parsing outgoing ARP REQUEST");
+ batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REQUEST");
ip_src = batadv_arp_ip_src(skb, hdr_size);
hw_src = batadv_arp_hw_src(skb, hdr_size);
@@ -1007,20 +1040,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
goto out;
}
- skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
- bat_priv->soft_iface, ip_dst, hw_src,
- dat_entry->mac_addr, hw_src);
+ skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
+ dat_entry->mac_addr,
+ hw_src, vid);
if (!skb_new)
goto out;
- if (vid & BATADV_VLAN_HAS_TAG) {
- skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
- vid & VLAN_VID_MASK);
- if (!skb_new)
- goto out;
- }
-
- skb_reset_mac_header(skb_new);
skb_new->protocol = eth_type_trans(skb_new,
bat_priv->soft_iface);
bat_priv->stats.rx_packets++;
@@ -1075,8 +1100,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
ip_src = batadv_arp_ip_src(skb, hdr_size);
ip_dst = batadv_arp_ip_dst(skb, hdr_size);
- batadv_dbg_arp(bat_priv, skb, type, hdr_size,
- "Parsing incoming ARP REQUEST");
+ batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REQUEST");
batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
@@ -1084,25 +1108,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
if (!dat_entry)
goto out;
- skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
- bat_priv->soft_iface, ip_dst, hw_src,
- dat_entry->mac_addr, hw_src);
-
+ skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
+ dat_entry->mac_addr, hw_src, vid);
if (!skb_new)
goto out;
- /* the rest of the TX path assumes that the mac_header offset pointing
- * to the inner Ethernet header has been set, therefore reset it now.
- */
- skb_reset_mac_header(skb_new);
-
- if (vid & BATADV_VLAN_HAS_TAG) {
- skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
- vid & VLAN_VID_MASK);
- if (!skb_new)
- goto out;
- }
-
/* To preserve backwards compatibility, the node has choose the outgoing
* format based on the incoming request packet type. The assumption is
* that a node not using the 4addr packet format doesn't support it.
@@ -1149,8 +1159,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
if (type != ARPOP_REPLY)
return;
- batadv_dbg_arp(bat_priv, skb, type, hdr_size,
- "Parsing outgoing ARP REPLY");
+ batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REPLY");
hw_src = batadv_arp_hw_src(skb, hdr_size);
ip_src = batadv_arp_ip_src(skb, hdr_size);
@@ -1195,8 +1204,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
if (type != ARPOP_REPLY)
goto out;
- batadv_dbg_arp(bat_priv, skb, type, hdr_size,
- "Parsing incoming ARP REPLY");
+ batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REPLY");
hw_src = batadv_arp_hw_src(skb, hdr_size);
ip_src = batadv_arp_ip_src(skb, hdr_size);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0934730fb7ff..0854ebd8613e 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/if_ether.h>
@@ -42,17 +43,23 @@
/**
* batadv_frag_clear_chain - delete entries in the fragment buffer chain
* @head: head of chain with entries.
+ * @dropped: whether the chain is cleared because all fragments are dropped
*
* Free fragments in the passed hlist. Should be called with appropriate lock.
*/
-static void batadv_frag_clear_chain(struct hlist_head *head)
+static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
{
struct batadv_frag_list_entry *entry;
struct hlist_node *node;
hlist_for_each_entry_safe(entry, node, head, list) {
hlist_del(&entry->list);
- kfree_skb(entry->skb);
+
+ if (dropped)
+ kfree_skb(entry->skb);
+ else
+ consume_skb(entry->skb);
+
kfree(entry);
}
}
@@ -73,7 +80,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
spin_lock_bh(&chain->lock);
if (!check_cb || check_cb(chain)) {
- batadv_frag_clear_chain(&chain->head);
+ batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
}
@@ -117,8 +124,8 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
if (chain->seqno == seqno)
return false;
- if (!hlist_empty(&chain->head))
- batadv_frag_clear_chain(&chain->head);
+ if (!hlist_empty(&chain->fragment_list))
+ batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
chain->seqno = seqno;
@@ -176,7 +183,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
chain = &orig_node->fragments[bucket];
spin_lock_bh(&chain->lock);
if (batadv_frag_init_chain(chain, seqno)) {
- hlist_add_head(&frag_entry_new->list, &chain->head);
+ hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
chain->size = skb->len - hdr_size;
chain->timestamp = jiffies;
chain->total_size = ntohs(frag_packet->total_size);
@@ -185,7 +192,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
}
/* Find the position for the new fragment. */
- hlist_for_each_entry(frag_entry_curr, &chain->head, list) {
+ hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) {
/* Drop packet if fragment already exists. */
if (frag_entry_curr->no == frag_entry_new->no)
goto err_unlock;
@@ -220,11 +227,11 @@ out:
* exceeds the maximum size of one merged packet. Don't allow
* packets to have different total_size.
*/
- batadv_frag_clear_chain(&chain->head);
+ batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
} else if (ntohs(frag_packet->total_size) == chain->size) {
/* All fragments received. Hand over chain to caller. */
- hlist_move_list(&chain->head, chain_out);
+ hlist_move_list(&chain->fragment_list, chain_out);
chain->size = 0;
}
@@ -252,8 +259,9 @@ batadv_frag_merge_packets(struct hlist_head *chain)
{
struct batadv_frag_packet *packet;
struct batadv_frag_list_entry *entry;
- struct sk_buff *skb_out = NULL;
+ struct sk_buff *skb_out;
int size, hdr_size = sizeof(struct batadv_frag_packet);
+ bool dropped = false;
/* Remove first entry, as this is the destination for the rest of the
* fragments.
@@ -270,6 +278,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
kfree_skb(skb_out);
skb_out = NULL;
+ dropped = true;
goto free;
}
@@ -291,7 +300,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
free:
/* Locking is not needed, because 'chain' is not part of any orig. */
- batadv_frag_clear_chain(chain);
+ batadv_frag_clear_chain(chain, dropped);
return skb_out;
}
@@ -352,7 +361,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_orig_node *orig_node_dst = NULL;
+ struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
@@ -433,8 +442,7 @@ err:
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
*
- * Return: the netdev tx status or -1 in case of error.
- * When -1 is returned the skb is not consumed.
+ * Return: the netdev tx status or a negative errno code on a failure
*/
int batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -447,7 +455,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
unsigned int max_fragment_size, max_packet_size;
- int ret = -1;
+ int ret;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -457,13 +465,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
/* Don't even try to fragment, if we need more than 16 fragments */
- if (skb->len > max_packet_size)
- goto out;
+ if (skb->len > max_packet_size) {
+ ret = -EAGAIN;
+ goto free_skb;
+ }
bat_priv = orig_node->bat_priv;
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
+ if (!primary_if) {
+ ret = -EINVAL;
+ goto free_skb;
+ }
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -488,34 +500,35 @@ int batadv_frag_send_packet(struct sk_buff *skb,
/* Eat and send fragments from the tail of skb */
while (skb->len > max_fragment_size) {
skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
- if (!skb_fragment)
- goto out;
+ if (!skb_fragment) {
+ ret = -ENOMEM;
+ goto put_primary_if;
+ }
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
if (ret != NET_XMIT_SUCCESS) {
- /* return -1 so that the caller can free the original
- * skb
- */
- ret = -1;
- goto out;
+ ret = NET_XMIT_DROP;
+ goto put_primary_if;
}
frag_header.no++;
/* The initial check in this function should cover this case */
if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
- ret = -1;
- goto out;
+ ret = -EINVAL;
+ goto put_primary_if;
}
}
/* Make room for the fragment header. */
if (batadv_skb_head_push(skb, header_size) < 0 ||
- pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
- goto out;
+ pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
+ ret = -ENOMEM;
+ goto put_primary_if;
+ }
memcpy(skb->data, &frag_header, header_size);
@@ -524,10 +537,13 @@ int batadv_frag_send_packet(struct sk_buff *skb,
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb, neigh_node);
+ /* skb was consumed */
+ skb = NULL;
-out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+put_primary_if:
+ batadv_hardif_put(primary_if);
+free_skb:
+ kfree_skb(skb);
return ret;
}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 3202fe329e63..b95f619606af 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -47,7 +47,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
static inline bool
batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
{
- if (!hlist_empty(&frags_entry->head) &&
+ if (!hlist_empty(&frags_entry->fragment_list) &&
batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
return true;
return false;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de055d64debe..52b8bd6ec431 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -348,7 +348,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->gw.list_lock);
kref_get(&gw_node->refcount);
- hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
+ hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
spin_unlock_bh(&bat_priv->gw.list_lock);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -376,7 +376,8 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) {
+ hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.gateway_list,
+ list) {
if (gw_node_tmp->orig_node != orig_node)
continue;
@@ -475,7 +476,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
spin_lock_bh(&bat_priv->gw.list_lock);
hlist_for_each_entry_safe(gw_node, node_tmp,
- &bat_priv->gw.list, list) {
+ &bat_priv->gw.gateway_list, list) {
hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_put(gw_node);
}
@@ -704,7 +705,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
{
struct batadv_neigh_node *neigh_curr = NULL;
struct batadv_neigh_node *neigh_old = NULL;
- struct batadv_orig_node *orig_dst_node = NULL;
+ struct batadv_orig_node *orig_dst_node;
struct batadv_gw_node *gw_node = NULL;
struct batadv_gw_node *curr_gw = NULL;
struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 08ce36147c4c..61a431a9772b 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -92,8 +92,8 @@ out:
*
* Return: result of rtnl_link_ops->get_link_net or @fallback_net
*/
-static const struct net *batadv_getlink_net(const struct net_device *netdev,
- const struct net *fallback_net)
+static struct net *batadv_getlink_net(const struct net_device *netdev,
+ struct net *fallback_net)
{
if (!netdev->rtnl_link_ops)
return fallback_net;
@@ -116,9 +116,9 @@ static const struct net *batadv_getlink_net(const struct net_device *netdev,
* Return: true if the devices are each others parent, otherwise false
*/
static bool batadv_mutual_parents(const struct net_device *dev1,
- const struct net *net1,
+ struct net *net1,
const struct net_device *dev2,
- const struct net *net2)
+ struct net *net2)
{
int dev1_parent_iflink = dev_get_iflink(dev1);
int dev2_parent_iflink = dev_get_iflink(dev2);
@@ -154,7 +154,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
{
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
- const struct net *parent_net;
+ struct net *parent_net;
bool ret;
/* check if this is a batman-adv mesh interface */
@@ -202,13 +202,77 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
}
/**
- * batadv_is_wifi_netdev - check if the given net_device struct is a wifi
- * interface
+ * batadv_get_real_netdevice - check if the given netdev struct is a virtual
+ * interface on top of another 'real' interface
+ * @netdev: the device to check
+ *
+ * Callers must hold the rtnl semaphore. You may want batadv_get_real_netdev()
+ * instead of this.
+ *
+ * Return: the 'real' net device or the original net device and NULL in case
+ * of an error.
+ */
+static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+{
+ struct batadv_hard_iface *hard_iface = NULL;
+ struct net_device *real_netdev = NULL;
+ struct net *real_net;
+ struct net *net;
+ int ifindex;
+
+ ASSERT_RTNL();
+
+ if (!netdev)
+ return NULL;
+
+ if (netdev->ifindex == dev_get_iflink(netdev)) {
+ dev_hold(netdev);
+ return netdev;
+ }
+
+ hard_iface = batadv_hardif_get_by_netdev(netdev);
+ if (!hard_iface || !hard_iface->soft_iface)
+ goto out;
+
+ net = dev_net(hard_iface->soft_iface);
+ ifindex = dev_get_iflink(netdev);
+ real_net = batadv_getlink_net(netdev, net);
+ real_netdev = dev_get_by_index(real_net, ifindex);
+
+out:
+ if (hard_iface)
+ batadv_hardif_put(hard_iface);
+ return real_netdev;
+}
+
+/**
+ * batadv_get_real_netdev - check if the given net_device struct is a virtual
+ * interface on top of another 'real' interface
* @net_device: the device to check
*
- * Return: true if the net device is a 802.11 wireless device, false otherwise.
+ * Return: the 'real' net device or the original net device and NULL in case
+ * of an error.
*/
-bool batadv_is_wifi_netdev(struct net_device *net_device)
+struct net_device *batadv_get_real_netdev(struct net_device *net_device)
+{
+ struct net_device *real_netdev;
+
+ rtnl_lock();
+ real_netdev = batadv_get_real_netdevice(net_device);
+ rtnl_unlock();
+
+ return real_netdev;
+}
+
+/**
+ * batadv_is_wext_netdev - check if the given net_device struct is a
+ * wext wifi interface
+ * @net_device: the device to check
+ *
+ * Return: true if the net device is a wext wireless device, false
+ * otherwise.
+ */
+static bool batadv_is_wext_netdev(struct net_device *net_device)
{
if (!net_device)
return false;
@@ -221,6 +285,22 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
return true;
#endif
+ return false;
+}
+
+/**
+ * batadv_is_cfg80211_netdev - check if the given net_device struct is a
+ * cfg80211 wifi interface
+ * @net_device: the device to check
+ *
+ * Return: true if the net device is a cfg80211 wireless device, false
+ * otherwise.
+ */
+static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
+{
+ if (!net_device)
+ return false;
+
/* cfg80211 drivers have to set ieee80211_ptr */
if (net_device->ieee80211_ptr)
return true;
@@ -228,6 +308,125 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
return false;
}
+/**
+ * batadv_wifi_flags_evaluate - calculate wifi flags for net_device
+ * @net_device: the device to check
+ *
+ * Return: batadv_hard_iface_wifi_flags flags of the device
+ */
+static u32 batadv_wifi_flags_evaluate(struct net_device *net_device)
+{
+ u32 wifi_flags = 0;
+ struct net_device *real_netdev;
+
+ if (batadv_is_wext_netdev(net_device))
+ wifi_flags |= BATADV_HARDIF_WIFI_WEXT_DIRECT;
+
+ if (batadv_is_cfg80211_netdev(net_device))
+ wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
+
+ real_netdev = batadv_get_real_netdevice(net_device);
+ if (!real_netdev)
+ return wifi_flags;
+
+ if (real_netdev == net_device)
+ goto out;
+
+ if (batadv_is_wext_netdev(real_netdev))
+ wifi_flags |= BATADV_HARDIF_WIFI_WEXT_INDIRECT;
+
+ if (batadv_is_cfg80211_netdev(real_netdev))
+ wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
+
+out:
+ dev_put(real_netdev);
+ return wifi_flags;
+}
+
+/**
+ * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi
+ * interface
+ * @hard_iface: the device to check
+ *
+ * Return: true if the net device is a cfg80211 wireless device, false
+ * otherwise.
+ */
+bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface)
+{
+ u32 allowed_flags = 0;
+
+ allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
+ allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
+
+ return !!(hard_iface->wifi_flags & allowed_flags);
+}
+
+/**
+ * batadv_is_wifi_hardif - check if the given hardif is a wifi interface
+ * @hard_iface: the device to check
+ *
+ * Return: true if the net device is a 802.11 wireless device, false otherwise.
+ */
+bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface)
+{
+ if (!hard_iface)
+ return false;
+
+ return hard_iface->wifi_flags != 0;
+}
+
+/**
+ * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary
+ * @if_outgoing: the outgoing interface checked and considered for (re)broadcast
+ * @orig_addr: the originator of this packet
+ * @orig_neigh: originator address of the forwarder we just got the packet from
+ * (NULL if we originated)
+ *
+ * Checks whether a packet needs to be (re)broadcasted on the given interface.
+ *
+ * Return:
+ * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface
+ * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder
+ * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator
+ * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast
+ */
+int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
+ u8 *orig_addr, u8 *orig_neigh)
+{
+ struct batadv_hardif_neigh_node *hardif_neigh;
+ struct hlist_node *first;
+ int ret = BATADV_HARDIF_BCAST_OK;
+
+ rcu_read_lock();
+
+ /* 0 neighbors -> no (re)broadcast */
+ first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list));
+ if (!first) {
+ ret = BATADV_HARDIF_BCAST_NORECIPIENT;
+ goto out;
+ }
+
+ /* >1 neighbors -> (re)brodcast */
+ if (rcu_dereference(hlist_next_rcu(first)))
+ goto out;
+
+ hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node,
+ list);
+
+ /* 1 neighbor, is the originator -> no rebroadcast */
+ if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) {
+ ret = BATADV_HARDIF_BCAST_DUPORIG;
+ /* 1 neighbor, is the one we received from -> no rebroadcast */
+ } else if (orig_neigh &&
+ batadv_compare_eth(hardif_neigh->orig, orig_neigh)) {
+ ret = BATADV_HARDIF_BCAST_DUPFWD;
+ }
+
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
static struct batadv_hard_iface *
batadv_hardif_get_active(const struct net_device *soft_iface)
{
@@ -697,7 +896,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
kref_init(&hard_iface->refcount);
hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
- if (batadv_is_wifi_netdev(net_dev))
+ hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
+ if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
batadv_v_hardif_init(hard_iface);
@@ -806,6 +1006,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (hard_iface == primary_if)
batadv_primary_if_update_addr(bat_priv, NULL);
break;
+ case NETDEV_CHANGEUPPER:
+ hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
+ if (batadv_is_wifi_hardif(hard_iface))
+ hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+ break;
default:
break;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index a76724d369bf..d6309a423629 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -40,6 +40,20 @@ enum batadv_hard_if_state {
};
/**
+ * enum batadv_hard_if_bcast - broadcast avoidance options
+ * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface
+ * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient
+ * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from
+ * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator
+ */
+enum batadv_hard_if_bcast {
+ BATADV_HARDIF_BCAST_OK = 0,
+ BATADV_HARDIF_BCAST_NORECIPIENT,
+ BATADV_HARDIF_BCAST_DUPFWD,
+ BATADV_HARDIF_BCAST_DUPORIG,
+};
+
+/**
* enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
* @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
* @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
@@ -51,8 +65,9 @@ enum batadv_hard_if_cleanup {
extern struct notifier_block batadv_hard_if_notifier;
-bool batadv_is_wifi_netdev(struct net_device *net_device);
-bool batadv_is_wifi_iface(int ifindex);
+struct net_device *batadv_get_real_netdev(struct net_device *net_device);
+bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface);
+bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
@@ -63,6 +78,8 @@ void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
void batadv_hardif_release(struct kref *ref);
+int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
+ u8 *orig_addr, u8 *orig_neigh);
/**
* batadv_hardif_put - decrement the hard interface refcounter and possibly
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index cbbf87075f06..557a7044cfbc 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -61,36 +61,6 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
/* free only the hashtable and the hash itself. */
void batadv_hash_destroy(struct batadv_hashtable *hash);
-/* remove the hash structure. if hashdata_free_cb != NULL, this function will be
- * called to remove the elements inside of the hash. if you don't remove the
- * elements, memory might be leaked.
- */
-static inline void batadv_hash_delete(struct batadv_hashtable *hash,
- batadv_hashdata_free_cb free_cb,
- void *arg)
-{
- struct hlist_head *head;
- struct hlist_node *node, *node_tmp;
- spinlock_t *list_lock; /* spinlock to protect write access */
- u32 i;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
- list_lock = &hash->list_locks[i];
-
- spin_lock_bh(list_lock);
- hlist_for_each_safe(node, node_tmp, head) {
- hlist_del_rcu(node);
-
- if (free_cb)
- free_cb(node, arg);
- }
- spin_unlock_bh(list_lock);
- }
-
- batadv_hash_destroy(hash);
-}
-
/**
* batadv_hash_add - adds data to the hashtable
* @hash: storage hash table
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 378cc1119d66..b310f381ae02 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -38,7 +38,6 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -322,8 +321,8 @@ int batadv_socket_setup(struct batadv_priv *bat_priv)
if (!bat_priv->debug_dir)
goto err;
- d = debugfs_create_file(BATADV_ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR,
- bat_priv->debug_dir, bat_priv, &batadv_fops);
+ d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
+ bat_priv, &batadv_fops);
if (!d)
goto err;
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 56dc532f7a2c..c73c31769aba 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -31,7 +31,6 @@
#include <linux/sched.h> /* for linux/wait.h */
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/uaccess.h>
@@ -212,8 +211,7 @@ int batadv_debug_log_setup(struct batadv_priv *bat_priv)
spin_lock_init(&bat_priv->debug_log->lock);
init_waitqueue_head(&bat_priv->debug_log->queue_wait);
- d = debugfs_create_file("log", S_IFREG | S_IRUSR,
- bat_priv->debug_dir, bat_priv,
+ d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
&batadv_log_fops);
if (!d)
goto err;
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index d2905a855d1b..3284a7b0325d 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -71,12 +71,12 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
/* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
- do { \
- if (atomic_read(&bat_priv->log_level) & type && \
- (!ratelimited || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg);\
- } \
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
+ do { \
+ if (atomic_read(&(bat_priv)->log_level) & (type) && \
+ (!(ratelimited) || net_ratelimit())) \
+ batadv_debug_log(bat_priv, fmt, ## arg); \
+ } \
while (0)
#else /* !CONFIG_BATMAN_ADV_DEBUG */
__printf(4, 5)
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 2c017ab47557..d46415edd3be 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -23,6 +23,7 @@
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/genetlink.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
@@ -44,6 +45,7 @@
#include <linux/workqueue.h>
#include <net/dsfield.h>
#include <net/rtnetlink.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
#include "bat_iv_ogm.h"
@@ -160,7 +162,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
- INIT_HLIST_HEAD(&bat_priv->gw.list);
+ INIT_HLIST_HEAD(&bat_priv->gw.gateway_list);
#ifdef CONFIG_BATMAN_ADV_MCAST
INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list);
INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list);
@@ -402,6 +404,8 @@ void batadv_skb_set_priority(struct sk_buff *skb, int offset)
static int batadv_recv_unhandled_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
+ kfree_skb(skb);
+
return NET_RX_DROP;
}
@@ -416,7 +420,6 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *hard_iface;
u8 idx;
- int ret;
hard_iface = container_of(ptype, struct batadv_hard_iface,
batman_adv_ptype);
@@ -466,14 +469,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
/* reset control block to avoid left overs from previous users */
memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
- /* all receive handlers return whether they received or reused
- * the supplied skb. if not, we have to free the skb.
- */
idx = batadv_ogm_packet->packet_type;
- ret = (*batadv_rx_handler[idx])(skb, hard_iface);
-
- if (ret == NET_RX_DROP)
- kfree_skb(skb);
+ (*batadv_rx_handler[idx])(skb, hard_iface);
batadv_hardif_put(hard_iface);
@@ -653,3 +650,4 @@ MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
MODULE_VERSION(BATADV_SOURCE_VERSION);
MODULE_ALIAS_RTNL_LINK("batadv");
+MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 09af21e27639..a6cc8040a21d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.4"
+#define BATADV_SOURCE_VERSION "2016.5"
#endif
/* B.A.T.M.A.N. parameters */
@@ -48,6 +48,7 @@
#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
+#define BATADV_MCAST_WORK_PERIOD 500 /* 0.5 seconds */
#define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */
/* sliding packet range of received originator messages in sequence numbers
* (should be a multiple of our word size)
@@ -185,7 +186,6 @@ enum batadv_uev_type {
#include <linux/bitops.h> /* for packet.h */
#include <linux/compiler.h>
-#include <linux/cpumask.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h> /* for packet.h */
#include <linux/if_vlan.h>
@@ -200,8 +200,8 @@ struct packet_type;
struct seq_file;
struct sk_buff;
-#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
- (int)(vid & VLAN_VID_MASK) : -1)
+#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
+ (int)((vid) & VLAN_VID_MASK) : -1)
extern struct list_head batadv_hardif_list;
@@ -284,26 +284,6 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
-/**
- * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
- * @bat_priv: the bat priv with all the soft interface information
- * @idx: index of counter to sum up
- *
- * Return: sum of all cpu-local counters
- */
-static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
-{
- u64 *counters, sum = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
- sum += counters[idx];
- }
-
- return sum;
-}
-
/* Define a macro to reach the control buffer of the skb. The members of the
* control buffer are defined in struct batadv_skb_cb in types.h.
* The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 13661f43386f..090a69fc342e 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -33,6 +33,7 @@
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -48,6 +49,7 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include <net/addrconf.h>
#include <net/if_inet6.h>
#include <net/ip.h>
@@ -60,6 +62,18 @@
#include "translation-table.h"
#include "tvlv.h"
+static void batadv_mcast_mla_update(struct work_struct *work);
+
+/**
+ * batadv_mcast_start_timer - schedule the multicast periodic worker
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
+{
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work,
+ msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD));
+}
+
/**
* batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
* @soft_iface: netdev struct of the mesh interface
@@ -231,19 +245,15 @@ out:
/**
* batadv_mcast_mla_list_free - free a list of multicast addresses
- * @bat_priv: the bat priv with all the soft interface information
* @mcast_list: the list to free
*
* Removes and frees all items in the given mcast_list.
*/
-static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
- struct hlist_head *mcast_list)
+static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
{
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
- lockdep_assert_held(&bat_priv->tt.commit_lock);
-
hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
hlist_del(&mcast_entry->list);
kfree(mcast_entry);
@@ -259,6 +269,8 @@ static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
* translation table except the ones listed in the given mcast_list.
*
* If mcast_list is NULL then all are retracted.
+ *
+ * Do not call outside of the mcast worker! (or cancel mcast worker first)
*/
static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
struct hlist_head *mcast_list)
@@ -266,7 +278,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
- lockdep_assert_held(&bat_priv->tt.commit_lock);
+ WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
list) {
@@ -291,6 +303,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
*
* Adds multicast listener announcements from the given mcast_list to the
* translation table if they have not been added yet.
+ *
+ * Do not call outside of the mcast worker! (or cancel mcast worker first)
*/
static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
struct hlist_head *mcast_list)
@@ -298,7 +312,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
- lockdep_assert_held(&bat_priv->tt.commit_lock);
+ WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
if (!mcast_list)
return;
@@ -532,13 +546,18 @@ update:
}
/**
- * batadv_mcast_mla_update - update the own MLAs
+ * __batadv_mcast_mla_update - update the own MLAs
* @bat_priv: the bat priv with all the soft interface information
*
* Updates the own multicast listener announcements in the translation
* table as well as the own, announced multicast tvlv container.
+ *
+ * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list
+ * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are
+ * ensured by the non-parallel execution of the worker this function
+ * belongs to.
*/
-void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
+static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
{
struct net_device *soft_iface = bat_priv->soft_iface;
struct hlist_head mcast_list = HLIST_HEAD_INIT;
@@ -560,7 +579,30 @@ update:
batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
out:
- batadv_mcast_mla_list_free(bat_priv, &mcast_list);
+ batadv_mcast_mla_list_free(&mcast_list);
+}
+
+/**
+ * batadv_mcast_mla_update - update the own MLAs
+ * @work: kernel work struct
+ *
+ * Updates the own multicast listener announcements in the translation
+ * table as well as the own, announced multicast tvlv container.
+ *
+ * In the end, reschedules the work timer.
+ */
+static void batadv_mcast_mla_update(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_priv_mcast *priv_mcast;
+ struct batadv_priv *bat_priv;
+
+ delayed_work = to_delayed_work(work);
+ priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work);
+ bat_priv = container_of(priv_mcast, struct batadv_priv, mcast);
+
+ __batadv_mcast_mla_update(bat_priv);
+ batadv_mcast_start_timer(bat_priv);
}
/**
@@ -1132,6 +1174,9 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+
+ INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
+ batadv_mcast_start_timer(bat_priv);
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -1243,12 +1288,13 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
*/
void batadv_mcast_free(struct batadv_priv *bat_priv)
{
+ cancel_delayed_work_sync(&bat_priv->mcast.work);
+
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
- spin_lock_bh(&bat_priv->tt.commit_lock);
+ /* safely calling outside of worker, as worker was canceled above */
batadv_mcast_mla_tt_retract(bat_priv, NULL);
- spin_unlock_bh(&bat_priv->tt.commit_lock);
}
/**
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 1fb00ba84907..2cddaf52a21d 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -39,8 +39,6 @@ enum batadv_forw_mode {
#ifdef CONFIG_BATMAN_ADV_MCAST
-void batadv_mcast_mla_update(struct batadv_priv *bat_priv);
-
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node **mcast_single_orig);
@@ -55,10 +53,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
#else
-static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
-{
-}
-
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node **mcast_single_orig)
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 64cb6acbe0a6..062738163bdc 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -20,11 +20,14 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
#include <linux/errno.h>
+#include <linux/export.h>
#include <linux/fs.h>
#include <linux/genetlink.h>
#include <linux/if_ether.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/printk.h>
@@ -48,14 +51,7 @@
#include "tp_meter.h"
#include "translation-table.h"
-struct genl_family batadv_netlink_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = BATADV_NL_NAME,
- .version = 1,
- .maxattr = BATADV_ATTR_MAX,
- .netnsok = true,
-};
+struct genl_family batadv_netlink_family;
/* multicast groups */
enum batadv_netlink_multicast_groups {
@@ -534,7 +530,7 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
return msg->len;
}
-static struct genl_ops batadv_netlink_ops[] = {
+static const struct genl_ops batadv_netlink_ops[] = {
{
.cmd = BATADV_CMD_GET_MESH_INFO,
.flags = GENL_ADMIN_PERM,
@@ -610,6 +606,19 @@ static struct genl_ops batadv_netlink_ops[] = {
};
+struct genl_family batadv_netlink_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = BATADV_NL_NAME,
+ .version = 1,
+ .maxattr = BATADV_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = batadv_netlink_ops,
+ .n_ops = ARRAY_SIZE(batadv_netlink_ops),
+ .mcgrps = batadv_netlink_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
+};
+
/**
* batadv_netlink_register - register batadv genl netlink family
*/
@@ -617,9 +626,7 @@ void __init batadv_netlink_register(void)
{
int ret;
- ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
- batadv_netlink_ops,
- batadv_netlink_mcgrps);
+ ret = genl_register_family(&batadv_netlink_family);
if (ret)
pr_warn("unable to register netlink family");
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e3baf697a35c..ab5a3bf0765f 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -44,7 +44,6 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
@@ -261,10 +260,16 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
/**
* batadv_nc_packet_free - frees nc packet
* @nc_packet: the nc packet to free
+ * @dropped: whether the packet is freed because is is dropped
*/
-static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
+static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
+ bool dropped)
{
- kfree_skb(nc_packet->skb);
+ if (dropped)
+ kfree_skb(nc_packet->skb);
+ else
+ consume_skb(nc_packet->skb);
+
batadv_nc_path_put(nc_packet->nc_path);
kfree(nc_packet);
}
@@ -577,7 +582,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
{
batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node);
nc_packet->skb = NULL;
- batadv_nc_packet_free(nc_packet);
+ batadv_nc_packet_free(nc_packet, false);
}
/**
@@ -611,7 +616,7 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
/* purge nc packet */
list_del(&nc_packet->list);
- batadv_nc_packet_free(nc_packet);
+ batadv_nc_packet_free(nc_packet, true);
res = true;
@@ -1209,11 +1214,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
}
/* skb_src is now coded into skb_dest, so free it */
- kfree_skb(skb_src);
+ consume_skb(skb_src);
/* avoid duplicate free of skb from nc_packet */
nc_packet->skb = NULL;
- batadv_nc_packet_free(nc_packet);
+ batadv_nc_packet_free(nc_packet, false);
/* Send the coded packet and return true */
batadv_send_unicast_skb(skb_dest, first_dest);
@@ -1400,7 +1405,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
/* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
* our ref
*/
- kfree_skb(skb);
+ consume_skb(skb);
}
/**
@@ -1724,7 +1729,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
ether_addr_copy(unicast_packet->dest, orig_dest);
unicast_packet->ttvn = ttvn;
- batadv_nc_packet_free(nc_packet);
+ batadv_nc_packet_free(nc_packet, false);
return unicast_packet;
}
@@ -1814,11 +1819,11 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
/* Check if network coding is enabled */
if (!atomic_read(&bat_priv->network_coding))
- return NET_RX_DROP;
+ goto free_skb;
/* Make sure we can access (and remove) header */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
- return NET_RX_DROP;
+ goto free_skb;
coded_packet = (struct batadv_coded_packet *)skb->data;
ethhdr = eth_hdr(skb);
@@ -1826,7 +1831,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
/* Verify frame is destined for us */
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
!batadv_is_my_mac(bat_priv, coded_packet->second_dest))
- return NET_RX_DROP;
+ goto free_skb;
/* Update stat counter */
if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
@@ -1836,7 +1841,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
coded_packet);
if (!nc_packet) {
batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
- return NET_RX_DROP;
+ goto free_skb;
}
/* Make skb's linear, because decoding accesses the entire buffer */
@@ -1861,7 +1866,10 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
return batadv_recv_unicast_packet(skb, recv_if);
free_nc_packet:
- batadv_nc_packet_free(nc_packet);
+ batadv_nc_packet_free(nc_packet, true);
+free_skb:
+ kfree_skb(skb);
+
return NET_RX_DROP;
}
@@ -1961,17 +1969,16 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
if (!nc_dir)
goto out;
- file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
- &bat_priv->nc.min_tq);
+ file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
if (!file)
goto out;
- file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
+ file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
&bat_priv->nc.max_fwd_delay);
if (!file)
goto out;
- file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
+ file = debugfs_create_u32("max_buffer_time", 0644, nc_dir,
&bat_priv->nc.max_buffer_time);
if (!file)
goto out;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7c8d16086f0f..8f3b2969cc4e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -364,7 +364,7 @@ struct batadv_orig_ifinfo *
batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing)
{
- struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+ struct batadv_orig_ifinfo *orig_ifinfo;
unsigned long reset_time;
spin_lock_bh(&orig_node->neigh_list_lock);
@@ -512,15 +512,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
* batadv_hardif_neigh_create - create a hardif neighbour node
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
+ * @orig_node: originator object representing the neighbour
*
* Return: the hardif neighbour node if found or created or NULL otherwise.
*/
static struct batadv_hardif_neigh_node *
batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
- const u8 *neigh_addr)
+ const u8 *neigh_addr,
+ struct batadv_orig_node *orig_node)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+ struct batadv_hardif_neigh_node *hardif_neigh;
spin_lock_bh(&hard_iface->neigh_list_lock);
@@ -536,6 +538,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
kref_get(&hard_iface->refcount);
INIT_HLIST_NODE(&hardif_neigh->list);
ether_addr_copy(hardif_neigh->addr, neigh_addr);
+ ether_addr_copy(hardif_neigh->orig, orig_node->orig);
hardif_neigh->if_incoming = hard_iface;
hardif_neigh->last_seen = jiffies;
@@ -556,21 +559,23 @@ out:
* node
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
+ * @orig_node: originator object representing the neighbour
*
* Return: the hardif neighbour node if found or created or NULL otherwise.
*/
static struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
- const u8 *neigh_addr)
+ const u8 *neigh_addr,
+ struct batadv_orig_node *orig_node)
{
- struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+ struct batadv_hardif_neigh_node *hardif_neigh;
/* first check without locking to avoid the overhead */
hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr);
if (hardif_neigh)
return hardif_neigh;
- return batadv_hardif_neigh_create(hard_iface, neigh_addr);
+ return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node);
}
/**
@@ -630,7 +635,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
goto out;
hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface,
- neigh_addr);
+ neigh_addr, orig_node);
if (!hardif_neigh)
goto out;
@@ -683,7 +688,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr)
{
- struct batadv_neigh_node *neigh_node = NULL;
+ struct batadv_neigh_node *neigh_node;
/* first check without locking to avoid the overhead */
neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
@@ -1021,7 +1026,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
batadv_orig_node_vlan_put(vlan);
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
- INIT_HLIST_HEAD(&orig_node->fragments[i].head);
+ INIT_HLIST_HEAD(&orig_node->fragments[i].fragment_list);
spin_lock_init(&orig_node->fragments[i].lock);
orig_node->fragments[i].size = 0;
}
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6afc0b86950e..7a36bcfa0ba0 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,7 +21,7 @@
#include <asm/byteorder.h>
#include <linux/types.h>
-#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0)
/**
* enum batadv_packettype - types for batman-adv encapsulated packets
@@ -252,16 +252,6 @@ struct batadv_elp_packet {
#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
/**
- * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
- * @BATADV_TP_START: start a throughput meter run
- * @BATADV_TP_STOP: stop a throughput meter run
- */
-enum batadv_icmp_user_cmd_type {
- BATADV_TP_START = 0,
- BATADV_TP_STOP = 2,
-};
-
-/**
* struct batadv_icmp_header - common members among all the ICMP packets
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7e8dc648b95a..6713bdf414cd 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -196,8 +196,8 @@ bool batadv_check_management_packet(struct sk_buff *skb,
if (!is_broadcast_ether_addr(ethhdr->h_dest))
return false;
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
+ /* packet with invalid sender address */
+ if (!is_valid_ether_addr(ethhdr->h_source))
return false;
/* create a copy of the skb, if needed, to modify it. */
@@ -262,11 +262,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmph->ttl = BATADV_TTL;
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res == -1)
- goto out;
-
- ret = NET_RX_SUCCESS;
+ if (res == NET_XMIT_SUCCESS)
+ ret = NET_RX_SUCCESS;
+ /* skb was consumed */
+ skb = NULL;
break;
case BATADV_TP:
if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
@@ -274,6 +274,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
batadv_tp_meter_recv(bat_priv, skb);
ret = NET_RX_SUCCESS;
+ /* skb was consumed */
+ skb = NULL;
goto out;
default:
/* drop unknown type */
@@ -284,6 +286,9 @@ out:
batadv_hardif_put(primary_if);
if (orig_node)
batadv_orig_node_put(orig_node);
+
+ kfree_skb(skb);
+
return ret;
}
@@ -325,14 +330,20 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet->ttl = BATADV_TTL;
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res != -1)
- ret = NET_RX_SUCCESS;
+ if (res == NET_RX_SUCCESS)
+ ret = NET_XMIT_SUCCESS;
+
+ /* skb was consumed */
+ skb = NULL;
out:
if (primary_if)
batadv_hardif_put(primary_if);
if (orig_node)
batadv_orig_node_put(orig_node);
+
+ kfree_skb(skb);
+
return ret;
}
@@ -349,21 +360,21 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
- goto out;
+ goto free_skb;
ethhdr = eth_hdr(skb);
- /* packet with unicast indication but broadcast recipient */
- if (is_broadcast_ether_addr(ethhdr->h_dest))
- goto out;
+ /* packet with unicast indication but non-unicast recipient */
+ if (!is_valid_ether_addr(ethhdr->h_dest))
+ goto free_skb;
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
- goto out;
+ /* packet with broadcast/multicast sender address */
+ if (is_multicast_ether_addr(ethhdr->h_source))
+ goto free_skb;
/* not for me */
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
- goto out;
+ goto free_skb;
icmph = (struct batadv_icmp_header *)skb->data;
@@ -372,17 +383,17 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
icmph->msg_type == BATADV_ECHO_REQUEST) &&
(skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
if (skb_linearize(skb) < 0)
- goto out;
+ goto free_skb;
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
- goto out;
+ goto free_skb;
ethhdr = eth_hdr(skb);
icmph = (struct batadv_icmp_header *)skb->data;
icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
- goto out;
+ goto free_skb;
ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur],
ethhdr->h_dest);
@@ -400,11 +411,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* get routing information */
orig_node = batadv_orig_hash_find(bat_priv, icmph->dst);
if (!orig_node)
- goto out;
+ goto free_skb;
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
- goto out;
+ goto put_orig_node;
icmph = (struct batadv_icmp_header *)skb->data;
@@ -413,12 +424,18 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* route it */
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
- if (res != -1)
+ if (res == NET_XMIT_SUCCESS)
ret = NET_RX_SUCCESS;
-out:
+ /* skb was consumed */
+ skb = NULL;
+
+put_orig_node:
if (orig_node)
batadv_orig_node_put(orig_node);
+free_skb:
+ kfree_skb(skb);
+
return ret;
}
@@ -445,12 +462,12 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
ethhdr = eth_hdr(skb);
- /* packet with unicast indication but broadcast recipient */
- if (is_broadcast_ether_addr(ethhdr->h_dest))
+ /* packet with unicast indication but non-unicast recipient */
+ if (!is_valid_ether_addr(ethhdr->h_dest))
return -EBADR;
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
+ /* packet with broadcast/multicast sender address */
+ if (is_multicast_ether_addr(ethhdr->h_source))
return -EBADR;
/* not for me */
@@ -667,18 +684,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
if (unicast_packet->ttl < 2) {
pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n",
ethhdr->h_source, unicast_packet->dest);
- goto out;
+ goto free_skb;
}
/* get routing information */
orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest);
if (!orig_node)
- goto out;
+ goto free_skb;
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
- goto out;
+ goto put_orig_node;
/* decrement ttl */
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -702,8 +719,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
- if (res == -1)
- goto out;
+ if (res == NET_XMIT_SUCCESS)
+ ret = NET_RX_SUCCESS;
+
+ /* skb was consumed */
+ skb = NULL;
/* translate transmit result into receive result */
if (res == NET_XMIT_SUCCESS) {
@@ -713,11 +733,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
len + ETH_HLEN);
}
- ret = NET_RX_SUCCESS;
+put_orig_node:
+ batadv_orig_node_put(orig_node);
+free_skb:
+ kfree_skb(skb);
-out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
return ret;
}
@@ -902,14 +922,18 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
if (check < 0)
- return NET_RX_DROP;
+ goto free_skb;
/* we don't know about this type, drop it. */
unicast_packet = (struct batadv_unicast_packet *)skb->data;
if (batadv_is_my_mac(bat_priv, unicast_packet->dest))
- return NET_RX_DROP;
+ goto free_skb;
return batadv_route_unicast_packet(skb, recv_if);
+
+free_skb:
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
int batadv_recv_unicast_packet(struct sk_buff *skb,
@@ -923,6 +947,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
bool is4addr;
+ int ret = NET_RX_DROP;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -942,9 +967,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
if (check < 0)
- return NET_RX_DROP;
+ goto free_skb;
if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
- return NET_RX_DROP;
+ goto free_skb;
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
@@ -982,7 +1007,14 @@ rx_success:
return NET_RX_SUCCESS;
}
- return batadv_route_unicast_packet(skb, recv_if);
+ ret = batadv_route_unicast_packet(skb, recv_if);
+ /* skb was consumed */
+ skb = NULL;
+
+free_skb:
+ kfree_skb(skb);
+
+ return ret;
}
/**
@@ -1004,15 +1036,15 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
int ret = NET_RX_DROP;
if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
- return NET_RX_DROP;
+ goto free_skb;
/* the header is likely to be modified while forwarding */
if (skb_cow(skb, hdr_size) < 0)
- return NET_RX_DROP;
+ goto free_skb;
/* packet needs to be linearized to access the tvlv content */
if (skb_linearize(skb) < 0)
- return NET_RX_DROP;
+ goto free_skb;
unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data;
@@ -1020,17 +1052,21 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len);
if (tvlv_buff_len > skb->len - hdr_size)
- return NET_RX_DROP;
+ goto free_skb;
ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
unicast_tvlv_packet->src,
unicast_tvlv_packet->dst,
tvlv_buff, tvlv_buff_len);
- if (ret != NET_RX_SUCCESS)
+ if (ret != NET_RX_SUCCESS) {
ret = batadv_route_unicast_packet(skb, recv_if);
- else
- consume_skb(skb);
+ /* skb was consumed */
+ skb = NULL;
+ }
+
+free_skb:
+ kfree_skb(skb);
return ret;
}
@@ -1056,20 +1092,22 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
if (batadv_check_unicast_packet(bat_priv, skb,
sizeof(*frag_packet)) < 0)
- goto out;
+ goto free_skb;
frag_packet = (struct batadv_frag_packet *)skb->data;
orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig);
if (!orig_node_src)
- goto out;
+ goto free_skb;
skb->priority = frag_packet->priority + 256;
/* Route the fragment if it is not for us and too big to be merged. */
if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
+ /* skb was consumed */
+ skb = NULL;
ret = NET_RX_SUCCESS;
- goto out;
+ goto put_orig_node;
}
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
@@ -1077,20 +1115,24 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
/* Add fragment to buffer and merge if possible. */
if (!batadv_frag_skb_buffer(&skb, orig_node_src))
- goto out;
+ goto put_orig_node;
/* Deliver merged packet to the appropriate handler, if it was
* merged
*/
- if (skb)
+ if (skb) {
batadv_batman_skb_recv(skb, recv_if->net_dev,
&recv_if->batman_adv_ptype, NULL);
+ /* skb was consumed */
+ skb = NULL;
+ }
ret = NET_RX_SUCCESS;
-out:
- if (orig_node_src)
- batadv_orig_node_put(orig_node_src);
+put_orig_node:
+ batadv_orig_node_put(orig_node_src);
+free_skb:
+ kfree_skb(skb);
return ret;
}
@@ -1109,35 +1151,35 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
- goto out;
+ goto free_skb;
ethhdr = eth_hdr(skb);
/* packet with broadcast indication but unicast recipient */
if (!is_broadcast_ether_addr(ethhdr->h_dest))
- goto out;
+ goto free_skb;
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
- goto out;
+ /* packet with broadcast/multicast sender address */
+ if (is_multicast_ether_addr(ethhdr->h_source))
+ goto free_skb;
/* ignore broadcasts sent by myself */
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
- goto out;
+ goto free_skb;
bcast_packet = (struct batadv_bcast_packet *)skb->data;
/* ignore broadcasts originated by myself */
if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
- goto out;
+ goto free_skb;
if (bcast_packet->ttl < 2)
- goto out;
+ goto free_skb;
orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig);
if (!orig_node)
- goto out;
+ goto free_skb;
spin_lock_bh(&orig_node->bcast_seqno_lock);
@@ -1165,18 +1207,18 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
/* check whether this has been sent by another originator before */
if (batadv_bla_check_bcast_duplist(bat_priv, skb))
- goto out;
+ goto free_skb;
batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
/* rebroadcast packet */
- batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
+ batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false);
/* don't hand the broadcast up if it is from an originator
* from the same backbone.
*/
if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size))
- goto out;
+ goto free_skb;
if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size))
goto rx_success;
@@ -1192,6 +1234,8 @@ rx_success:
spin_unlock:
spin_unlock_bh(&orig_node->bcast_seqno_lock);
+free_skb:
+ kfree_skb(skb);
out:
if (orig_node)
batadv_orig_node_put(orig_node);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 8d4e1f578574..49021b7124f3 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -64,8 +65,11 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
* If neigh_node is NULL, then the packet is broadcasted using hard_iface,
* otherwise it is sent as unicast to the given neighbor.
*
- * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb)
- * otherwise
+ * Regardless of the return value, the skb is consumed.
+ *
+ * Return: A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
*/
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
@@ -73,7 +77,6 @@ int batadv_send_skb_packet(struct sk_buff *skb,
{
struct batadv_priv *bat_priv;
struct ethhdr *ethhdr;
- int ret;
bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -111,15 +114,8 @@ int batadv_send_skb_packet(struct sk_buff *skb,
/* dev_queue_xmit() returns a negative result on error. However on
* congestion and traffic shaping, it drops and returns NET_XMIT_DROP
* (which is > 0). This will not be treated as an error.
- *
- * a negative value cannot be returned because it could be interepreted
- * as not consumed skb by callers of batadv_send_skb_to_orig.
*/
- ret = dev_queue_xmit(skb);
- if (ret < 0)
- ret = NET_XMIT_DROP;
-
- return ret;
+ return dev_queue_xmit(skb);
send_skb_err:
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -165,11 +161,9 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
* host, NULL can be passed as recv_if and no interface alternating is
* attempted.
*
- * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
- * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * Return: negative errno code on a failure, -EINPROGRESS if the skb is
+ * buffered for later transmit or the NET_XMIT status returned by the
* lower routine if the packet has been passed down.
- *
- * If the returning value is not -1 the skb has been consumed.
*/
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -177,12 +171,14 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = orig_node->bat_priv;
struct batadv_neigh_node *neigh_node;
- int ret = -1;
+ int ret;
/* batadv_find_router() increases neigh_nodes refcount if found. */
neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
- if (!neigh_node)
- goto out;
+ if (!neigh_node) {
+ ret = -EINVAL;
+ goto free_skb;
+ }
/* Check if the skb is too large to send in one piece and fragment
* it if needed.
@@ -191,8 +187,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
skb->len > neigh_node->if_incoming->net_dev->mtu) {
/* Fragment and send packet. */
ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
+ /* skb was consumed */
+ skb = NULL;
- goto out;
+ goto put_neigh_node;
}
/* try to network code the packet, if it is received on an interface
@@ -204,9 +202,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
else
ret = batadv_send_unicast_skb(skb, neigh_node);
-out:
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
+ /* skb was consumed */
+ skb = NULL;
+
+put_neigh_node:
+ batadv_neigh_node_put(neigh_node);
+free_skb:
+ kfree_skb(skb);
return ret;
}
@@ -327,7 +329,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
{
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr;
- int res, ret = NET_XMIT_DROP;
+ int ret = NET_XMIT_DROP;
if (!orig_node)
goto out;
@@ -364,13 +366,12 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
unicast_packet->ttvn = unicast_packet->ttvn - 1;
- res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res != -1)
- ret = NET_XMIT_SUCCESS;
+ ret = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ /* skb was consumed */
+ skb = NULL;
out:
- if (ret == NET_XMIT_DROP)
- kfree_skb(skb);
+ kfree_skb(skb);
return ret;
}
@@ -451,13 +452,19 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
/**
* batadv_forw_packet_free - free a forwarding packet
* @forw_packet: The packet to free
+ * @dropped: whether the packet is freed because is is dropped
*
* This frees a forwarding packet and releases any resources it might
* have claimed.
*/
-void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
+ bool dropped)
{
- kfree_skb(forw_packet->skb);
+ if (dropped)
+ kfree_skb(forw_packet->skb);
+ else
+ consume_skb(forw_packet->skb);
+
if (forw_packet->if_incoming)
batadv_hardif_put(forw_packet->if_incoming);
if (forw_packet->if_outgoing)
@@ -514,6 +521,8 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
if (if_outgoing)
kref_get(&if_outgoing->refcount);
+ INIT_HLIST_NODE(&forw_packet->list);
+ INIT_HLIST_NODE(&forw_packet->cleanup_list);
forw_packet->skb = NULL;
forw_packet->queue_left = queue_left;
forw_packet->if_incoming = if_incoming;
@@ -529,19 +538,191 @@ err:
return NULL;
}
+/**
+ * batadv_forw_packet_was_stolen - check whether someone stole this packet
+ * @forw_packet: the forwarding packet to check
+ *
+ * This function checks whether the given forwarding packet was claimed by
+ * someone else for free().
+ *
+ * Return: True if someone stole it, false otherwise.
+ */
+static bool
+batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet)
+{
+ return !hlist_unhashed(&forw_packet->cleanup_list);
+}
+
+/**
+ * batadv_forw_packet_steal - claim a forw_packet for free()
+ * @forw_packet: the forwarding packet to steal
+ * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock)
+ *
+ * This function tries to steal a specific forw_packet from global
+ * visibility for the purpose of getting it for free(). That means
+ * the caller is *not* allowed to requeue it afterwards.
+ *
+ * Return: True if stealing was successful. False if someone else stole it
+ * before us.
+ */
+bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
+ spinlock_t *lock)
+{
+ /* did purging routine steal it earlier? */
+ spin_lock_bh(lock);
+ if (batadv_forw_packet_was_stolen(forw_packet)) {
+ spin_unlock_bh(lock);
+ return false;
+ }
+
+ hlist_del_init(&forw_packet->list);
+
+ /* Just to spot misuse of this function */
+ hlist_add_fake(&forw_packet->cleanup_list);
+
+ spin_unlock_bh(lock);
+ return true;
+}
+
+/**
+ * batadv_forw_packet_list_steal - claim a list of forward packets for free()
+ * @forw_list: the to be stolen forward packets
+ * @cleanup_list: a backup pointer, to be able to dispose the packet later
+ * @hard_iface: the interface to steal forward packets from
+ *
+ * This function claims responsibility to free any forw_packet queued on the
+ * given hard_iface. If hard_iface is NULL forwarding packets on all hard
+ * interfaces will be claimed.
+ *
+ * The packets are being moved from the forw_list to the cleanup_list and
+ * by that allows already running threads to notice the claiming.
+ */
static void
-_batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
- struct batadv_forw_packet *forw_packet,
- unsigned long send_time)
+batadv_forw_packet_list_steal(struct hlist_head *forw_list,
+ struct hlist_head *cleanup_list,
+ const struct batadv_hard_iface *hard_iface)
{
- /* add new packet to packet list */
- spin_lock_bh(&bat_priv->forw_bcast_list_lock);
- hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list);
- spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+ struct batadv_forw_packet *forw_packet;
+ struct hlist_node *safe_tmp_node;
+
+ hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
+ forw_list, list) {
+ /* if purge_outstanding_packets() was called with an argument
+ * we delete only packets belonging to the given interface
+ */
+ if (hard_iface &&
+ (forw_packet->if_incoming != hard_iface) &&
+ (forw_packet->if_outgoing != hard_iface))
+ continue;
+
+ hlist_del(&forw_packet->list);
+ hlist_add_head(&forw_packet->cleanup_list, cleanup_list);
+ }
+}
+
+/**
+ * batadv_forw_packet_list_free - free a list of forward packets
+ * @head: a list of to be freed forw_packets
+ *
+ * This function cancels the scheduling of any packet in the provided list,
+ * waits for any possibly running packet forwarding thread to finish and
+ * finally, safely frees this forward packet.
+ *
+ * This function might sleep.
+ */
+static void batadv_forw_packet_list_free(struct hlist_head *head)
+{
+ struct batadv_forw_packet *forw_packet;
+ struct hlist_node *safe_tmp_node;
+
+ hlist_for_each_entry_safe(forw_packet, safe_tmp_node, head,
+ cleanup_list) {
+ cancel_delayed_work_sync(&forw_packet->delayed_work);
+
+ hlist_del(&forw_packet->cleanup_list);
+ batadv_forw_packet_free(forw_packet, true);
+ }
+}
+
+/**
+ * batadv_forw_packet_queue - try to queue a forwarding packet
+ * @forw_packet: the forwarding packet to queue
+ * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock)
+ * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list)
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue a forwarding packet. Requeuing
+ * is prevented if the according interface is shutting down
+ * (e.g. if batadv_forw_packet_list_steal() was called for this
+ * packet earlier).
+ *
+ * Calling batadv_forw_packet_queue() after a call to
+ * batadv_forw_packet_steal() is forbidden!
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet,
+ spinlock_t *lock, struct hlist_head *head,
+ unsigned long send_time)
+{
+ spin_lock_bh(lock);
+
+ /* did purging routine steal it from us? */
+ if (batadv_forw_packet_was_stolen(forw_packet)) {
+ /* If you got it for free() without trouble, then
+ * don't get back into the queue after stealing...
+ */
+ WARN_ONCE(hlist_fake(&forw_packet->cleanup_list),
+ "Requeuing after batadv_forw_packet_steal() not allowed!\n");
- /* start timer for this packet */
- queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work,
- send_time);
+ spin_unlock_bh(lock);
+ return;
+ }
+
+ hlist_del_init(&forw_packet->list);
+ hlist_add_head(&forw_packet->list, head);
+
+ queue_delayed_work(batadv_event_workqueue,
+ &forw_packet->delayed_work,
+ send_time - jiffies);
+ spin_unlock_bh(lock);
+}
+
+/**
+ * batadv_forw_packet_bcast_queue - try to queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @forw_packet: the forwarding packet to queue
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue a broadcast packet.
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+static void
+batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet,
+ unsigned long send_time)
+{
+ batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bcast_list_lock,
+ &bat_priv->forw_bcast_list, send_time);
+}
+
+/**
+ * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @forw_packet: the forwarding packet to queue
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue an OGMv1 packet.
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet,
+ unsigned long send_time)
+{
+ batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bat_list_lock,
+ &bat_priv->forw_bat_list, send_time);
}
/**
@@ -549,6 +730,7 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @skb: broadcast packet to add
* @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
*
* add a broadcast packet to the queue and setup timers. broadcast packets
* are sent multiple times to increase probability for being received.
@@ -560,9 +742,10 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
*/
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
- unsigned long delay)
+ unsigned long delay,
+ bool own_packet)
{
- struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_hard_iface *primary_if;
struct batadv_forw_packet *forw_packet;
struct batadv_bcast_packet *bcast_packet;
struct sk_buff *newskb;
@@ -586,18 +769,17 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
bcast_packet = (struct batadv_bcast_packet *)newskb->data;
bcast_packet->ttl--;
- skb_reset_mac_header(newskb);
-
forw_packet->skb = newskb;
+ forw_packet->own = own_packet;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
batadv_send_outstanding_bcast_packet);
- _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
+ batadv_forw_packet_bcast_queue(bat_priv, forw_packet, jiffies + delay);
return NETDEV_TX_OK;
err_packet_free:
- batadv_forw_packet_free(forw_packet);
+ batadv_forw_packet_free(forw_packet, true);
err:
return NETDEV_TX_BUSY;
}
@@ -605,11 +787,18 @@ err:
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
struct batadv_hard_iface *hard_iface;
+ struct batadv_hardif_neigh_node *neigh_node;
struct delayed_work *delayed_work;
struct batadv_forw_packet *forw_packet;
+ struct batadv_bcast_packet *bcast_packet;
struct sk_buff *skb1;
struct net_device *soft_iface;
struct batadv_priv *bat_priv;
+ unsigned long send_time = jiffies + msecs_to_jiffies(5);
+ bool dropped = false;
+ u8 *neigh_addr;
+ u8 *orig_neigh;
+ int ret = 0;
delayed_work = to_delayed_work(work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
@@ -617,15 +806,17 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
soft_iface = forw_packet->if_incoming->soft_iface;
bat_priv = netdev_priv(soft_iface);
- spin_lock_bh(&bat_priv->forw_bcast_list_lock);
- hlist_del(&forw_packet->list);
- spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
-
- if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
+ dropped = true;
goto out;
+ }
- if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet))
+ if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) {
+ dropped = true;
goto out;
+ }
+
+ bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data;
/* rebroadcast packet */
rcu_read_lock();
@@ -636,6 +827,49 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (forw_packet->num_packets >= hard_iface->num_bcasts)
continue;
+ if (forw_packet->own) {
+ neigh_node = NULL;
+ } else {
+ neigh_addr = eth_hdr(forw_packet->skb)->h_source;
+ neigh_node = batadv_hardif_neigh_get(hard_iface,
+ neigh_addr);
+ }
+
+ orig_neigh = neigh_node ? neigh_node->orig : NULL;
+
+ ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig,
+ orig_neigh);
+
+ if (ret) {
+ char *type;
+
+ switch (ret) {
+ case BATADV_HARDIF_BCAST_NORECIPIENT:
+ type = "no neighbor";
+ break;
+ case BATADV_HARDIF_BCAST_DUPFWD:
+ type = "single neighbor is source";
+ break;
+ case BATADV_HARDIF_BCAST_DUPORIG:
+ type = "single neighbor is originator";
+ break;
+ default:
+ type = "unknown";
+ }
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+ bcast_packet->orig,
+ hard_iface->net_dev->name, type);
+
+ if (neigh_node)
+ batadv_hardif_neigh_put(neigh_node);
+
+ continue;
+ }
+
+ if (neigh_node)
+ batadv_hardif_neigh_put(neigh_node);
+
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
@@ -652,22 +886,34 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
/* if we still have some more bcasts to send */
if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
- _batadv_add_bcast_packet_to_list(bat_priv, forw_packet,
- msecs_to_jiffies(5));
+ batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
+ send_time);
return;
}
out:
- batadv_forw_packet_free(forw_packet);
+ /* do we get something for free()? */
+ if (batadv_forw_packet_steal(forw_packet,
+ &bat_priv->forw_bcast_list_lock))
+ batadv_forw_packet_free(forw_packet, dropped);
}
+/**
+ * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on
+ *
+ * This method cancels and purges any broadcast and OGMv1 packet on the given
+ * hard_iface. If hard_iface is NULL, broadcast and OGMv1 packets on all hard
+ * interfaces will be canceled and purged.
+ *
+ * This function might sleep.
+ */
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface)
{
- struct batadv_forw_packet *forw_packet;
- struct hlist_node *safe_tmp_node;
- bool pending;
+ struct hlist_head head = HLIST_HEAD_INIT;
if (hard_iface)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -677,57 +923,18 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"purge_outstanding_packets()\n");
- /* free bcast list */
+ /* claim bcast list for free() */
spin_lock_bh(&bat_priv->forw_bcast_list_lock);
- hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
- &bat_priv->forw_bcast_list, list) {
- /* if purge_outstanding_packets() was called with an argument
- * we delete only packets belonging to the given interface
- */
- if ((hard_iface) &&
- (forw_packet->if_incoming != hard_iface) &&
- (forw_packet->if_outgoing != hard_iface))
- continue;
-
- spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
-
- /* batadv_send_outstanding_bcast_packet() will lock the list to
- * delete the item from the list
- */
- pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
- spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-
- if (pending) {
- hlist_del(&forw_packet->list);
- batadv_forw_packet_free(forw_packet);
- }
- }
+ batadv_forw_packet_list_steal(&bat_priv->forw_bcast_list, &head,
+ hard_iface);
spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
- /* free batman packet list */
+ /* claim batman packet list for free() */
spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
- &bat_priv->forw_bat_list, list) {
- /* if purge_outstanding_packets() was called with an argument
- * we delete only packets belonging to the given interface
- */
- if ((hard_iface) &&
- (forw_packet->if_incoming != hard_iface) &&
- (forw_packet->if_outgoing != hard_iface))
- continue;
-
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- /* send_outstanding_bat_packet() will lock the list to
- * delete the item from the list
- */
- pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
-
- if (pending) {
- hlist_del(&forw_packet->list);
- batadv_forw_packet_free(forw_packet);
- }
- }
+ batadv_forw_packet_list_steal(&bat_priv->forw_bat_list, &head,
+ hard_iface);
spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ /* then cancel or wait for packet workers to finish and free */
+ batadv_forw_packet_list_free(&head);
}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 999f78683d9e..a94e1e8639ca 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -21,18 +21,24 @@
#include "main.h"
#include <linux/compiler.h>
+#include <linux/spinlock.h>
#include <linux/types.h>
#include "packet.h"
struct sk_buff;
-void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
+ bool dropped);
struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
struct batadv_priv *bat_priv);
+bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
+void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet,
+ unsigned long send_time);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -46,7 +52,8 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node);
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
- unsigned long delay);
+ unsigned long delay,
+ bool own_packet);
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 49e16b6e0ba3..7b3494ae6ad9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -22,6 +22,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
@@ -116,6 +117,26 @@ static int batadv_interface_release(struct net_device *dev)
return 0;
}
+/**
+ * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
+ * @bat_priv: the bat priv with all the soft interface information
+ * @idx: index of counter to sum up
+ *
+ * Return: sum of all cpu-local counters
+ */
+static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
+{
+ u64 *counters, sum = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
+ sum += counters[idx];
+ }
+
+ return sum;
+}
+
static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -336,12 +357,12 @@ send:
seqno = atomic_inc_return(&bat_priv->bcast_seqno);
bcast_packet->seqno = htonl(seqno);
- batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay);
+ batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true);
/* a copy is stored in the bcast list, therefore removing
* the original skb.
*/
- kfree_skb(skb);
+ consume_skb(skb);
/* unicast packet */
} else {
@@ -365,7 +386,7 @@ send:
ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint,
vid);
}
- if (ret == NET_XMIT_DROP)
+ if (ret != NET_XMIT_SUCCESS)
goto dropped_freed;
}
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 02d96f224c60..17c844196eb2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -33,7 +33,6 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
-#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/stringify.h>
@@ -666,41 +665,36 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
return count;
}
-BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
-BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(aggregated_ogms, 0644, NULL);
+BATADV_ATTR_SIF_BOOL(bonding, 0644, NULL);
#ifdef CONFIG_BATMAN_ADV_BLA
-BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR,
- batadv_bla_status_update);
+BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, 0644, batadv_bla_status_update);
#endif
#ifdef CONFIG_BATMAN_ADV_DAT
-BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR,
- batadv_dat_status_update);
+BATADV_ATTR_SIF_BOOL(distributed_arp_table, 0644, batadv_dat_status_update);
#endif
-BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
-static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
-static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
- batadv_store_gw_mode);
-BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
- 2 * BATADV_JITTER, INT_MAX, NULL);
-BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
- BATADV_TQ_MAX_VALUE, NULL);
-static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+BATADV_ATTR_SIF_BOOL(fragmentation, 0644, batadv_update_min_mtu);
+static BATADV_ATTR(routing_algo, 0444, batadv_show_bat_algo, NULL);
+static BATADV_ATTR(gw_mode, 0644, batadv_show_gw_mode, batadv_store_gw_mode);
+BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, 0644, 2 * BATADV_JITTER,
+ INT_MAX, NULL);
+BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, 0644, 0, BATADV_TQ_MAX_VALUE,
+ NULL);
+static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class,
batadv_store_gw_sel_class);
-static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
+static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth,
batadv_store_gw_bwidth);
#ifdef CONFIG_BATMAN_ADV_MCAST
-BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_DEBUG
-BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0,
- BATADV_DBG_ALL, NULL);
+BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_NC
-BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR,
- batadv_nc_status_update);
+BATADV_ATTR_SIF_BOOL(network_coding, 0644, batadv_nc_status_update);
#endif
-static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR,
- batadv_show_isolation_mark, batadv_store_isolation_mark);
+static BATADV_ATTR(isolation_mark, 0644, batadv_show_isolation_mark,
+ batadv_store_isolation_mark);
static struct batadv_attribute *batadv_mesh_attrs[] = {
&batadv_attr_aggregated_ogms,
@@ -731,7 +725,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
NULL,
};
-BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_VLAN_BOOL(ap_isolation, 0644, NULL);
/* array of vlan specific sysfs attributes */
static struct batadv_attribute *batadv_vlan_attrs[] = {
@@ -1116,14 +1110,13 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
#endif
-static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface,
+static BATADV_ATTR(mesh_iface, 0644, batadv_show_mesh_iface,
batadv_store_mesh_iface);
-static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL);
+static BATADV_ATTR(iface_status, 0444, batadv_show_iface_status, NULL);
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
-BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR,
+BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, 0644,
2 * BATADV_JITTER, INT_MAX, NULL);
-static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR,
- batadv_show_throughput_override,
+static BATADV_ATTR(throughput_override, 0644, batadv_show_throughput_override,
batadv_store_throughput_override);
#endif
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8af1611b8ab2..981e8c5b07e9 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -615,9 +615,6 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
batadv_tp_fill_prerandom(tp_vars, data, data_len);
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (r == -1)
- kfree_skb(skb);
-
if (r == NET_XMIT_SUCCESS)
return 0;
@@ -1207,9 +1204,6 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
/* send the ack */
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (r == -1)
- kfree_skb(skb);
-
if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
ret = BATADV_TP_REASON_DST_UNREACHABLE;
goto out;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0dc85eb1cb7a..30ecbfb40adf 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -56,7 +56,6 @@
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
-#include "multicast.h"
#include "netlink.h"
#include "originator.h"
#include "packet.h"
@@ -647,6 +646,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
struct net *net = dev_net(soft_iface);
struct batadv_softif_vlan *vlan;
struct net_device *in_dev = NULL;
+ struct batadv_hard_iface *in_hardif = NULL;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
int hash_added, table_size, packet_size_max;
@@ -658,6 +658,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (ifindex != BATADV_NULL_IFINDEX)
in_dev = dev_get_by_index(net, ifindex);
+ if (in_dev)
+ in_hardif = batadv_hardif_get_by_netdev(in_dev);
+
tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!is_multicast_ether_addr(addr))
@@ -731,7 +734,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
*/
tt_local->common.flags = BATADV_TT_CLIENT_NEW;
tt_local->common.vid = vid;
- if (batadv_is_wifi_netdev(in_dev))
+ if (batadv_is_wifi_hardif(in_hardif))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
kref_init(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
@@ -791,7 +794,7 @@ check_roaming:
*/
remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK;
- if (batadv_is_wifi_netdev(in_dev))
+ if (batadv_is_wifi_hardif(in_hardif))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
else
tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI;
@@ -815,6 +818,8 @@ check_roaming:
ret = true;
out:
+ if (in_hardif)
+ batadv_hardif_put(in_hardif);
if (in_dev)
dev_put(in_dev);
if (tt_local)
@@ -3795,9 +3800,6 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
{
lockdep_assert_held(&bat_priv->tt.commit_lock);
- /* Update multicast addresses in local translation table */
- batadv_mcast_mla_update(bat_priv);
-
if (atomic_read(&bat_priv->tt.local_changes) < 1) {
if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
batadv_tt_tvlv_container_update(bat_priv);
@@ -3835,8 +3837,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
unsigned short vid)
{
- struct batadv_tt_local_entry *tt_local_entry = NULL;
- struct batadv_tt_global_entry *tt_global_entry = NULL;
+ struct batadv_tt_local_entry *tt_local_entry;
+ struct batadv_tt_global_entry *tt_global_entry;
struct batadv_softif_vlan *vlan;
bool ret = false;
@@ -3845,27 +3847,24 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
return false;
if (!atomic_read(&vlan->ap_isolation))
- goto out;
+ goto vlan_put;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid);
if (!tt_local_entry)
- goto out;
+ goto vlan_put;
tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid);
if (!tt_global_entry)
- goto out;
+ goto local_entry_put;
- if (!_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
- goto out;
-
- ret = true;
+ if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
+ ret = true;
-out:
+ batadv_tt_global_entry_put(tt_global_entry);
+local_entry_put:
+ batadv_tt_local_entry_put(tt_local_entry);
+vlan_put:
batadv_softif_vlan_put(vlan);
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 77654f055f24..a783420356ae 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -600,7 +600,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
unsigned char *tvlv_buff;
unsigned int tvlv_len;
ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
- int res;
orig_node = batadv_orig_hash_find(bat_priv, dst);
if (!orig_node)
@@ -633,9 +632,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
tvlv_buff += sizeof(*tvlv_hdr);
memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
- res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res == -1)
- kfree_skb(skb);
+ batadv_send_skb_to_orig(skb, orig_node, NULL);
out:
batadv_orig_node_put(orig_node);
}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index b3dd1a381aad..e913aee28c98 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -119,12 +119,28 @@ struct batadv_hard_iface_bat_v {
};
/**
+ * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration
+ * of a batadv_hard_iface
+ * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device
+ * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device
+ * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
+ * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device
+ */
+enum batadv_hard_iface_wifi_flags {
+ BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0),
+ BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1),
+ BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2),
+ BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3),
+};
+
+/**
* struct batadv_hard_iface - network device known to batman-adv
* @list: list node for batadv_hardif_list
* @if_num: identificator of the interface
* @if_status: status of the interface for batman-adv
- * @net_dev: pointer to the net_device
* @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
+ * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface
+ * @net_dev: pointer to the net_device
* @hardif_obj: kobject of the per interface sysfs "mesh" directory
* @refcount: number of contexts the object is used
* @batman_adv_ptype: packet type describing packets that should be processed by
@@ -141,8 +157,9 @@ struct batadv_hard_iface {
struct list_head list;
s16 if_num;
char if_status;
- struct net_device *net_dev;
u8 num_bcasts;
+ u32 wifi_flags;
+ struct net_device *net_dev;
struct kobject *hardif_obj;
struct kref refcount;
struct packet_type batman_adv_ptype;
@@ -184,7 +201,7 @@ struct batadv_orig_ifinfo {
/**
* struct batadv_frag_table_entry - head in the fragment buffer table
- * @head: head of list with fragments
+ * @fragment_list: head of list with fragments
* @lock: lock to protect the list of fragments
* @timestamp: time (jiffie) of last received fragment
* @seqno: sequence number of the fragments in the list
@@ -192,8 +209,8 @@ struct batadv_orig_ifinfo {
* @total_size: expected size of the assembled packet
*/
struct batadv_frag_table_entry {
- struct hlist_head head;
- spinlock_t lock; /* protects head */
+ struct hlist_head fragment_list;
+ spinlock_t lock; /* protects fragment_list */
unsigned long timestamp;
u16 seqno;
u16 size;
@@ -408,6 +425,7 @@ struct batadv_hardif_neigh_node_bat_v {
* struct batadv_hardif_neigh_node - unique neighbor per hard-interface
* @list: list node for batadv_hard_iface::neigh_list
* @addr: the MAC address of the neighboring interface
+ * @orig: the address of the originator this neighbor node belongs to
* @if_incoming: pointer to incoming hard-interface
* @last_seen: when last packet via this neighbor was received
* @bat_v: B.A.T.M.A.N. V private data
@@ -417,6 +435,7 @@ struct batadv_hardif_neigh_node_bat_v {
struct batadv_hardif_neigh_node {
struct hlist_node list;
u8 addr[ETH_ALEN];
+ u8 orig[ETH_ALEN];
struct batadv_hard_iface *if_incoming;
unsigned long last_seen;
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
@@ -706,8 +725,8 @@ struct batadv_priv_debug_log {
/**
* struct batadv_priv_gw - per mesh interface gateway data
- * @list: list of available gateway nodes
- * @list_lock: lock protecting gw_list & curr_gw
+ * @gateway_list: list of available gateway nodes
+ * @list_lock: lock protecting gateway_list & curr_gw
* @curr_gw: pointer to currently selected gateway node
* @mode: gateway operation: off, client or server (see batadv_gw_modes)
* @sel_class: gateway selection class (applies if gw_mode client)
@@ -716,8 +735,8 @@ struct batadv_priv_debug_log {
* @reselect: bool indicating a gateway re-selection is in progress
*/
struct batadv_priv_gw {
- struct hlist_head list;
- spinlock_t list_lock; /* protects gw_list & curr_gw */
+ struct hlist_head gateway_list;
+ spinlock_t list_lock; /* protects gateway_list & curr_gw */
struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
atomic_t mode;
atomic_t sel_class;
@@ -785,9 +804,10 @@ struct batadv_mcast_querier_state {
* @num_want_all_ipv6: counter for items in want_all_ipv6_list
* @want_lists_lock: lock for protecting modifications to mcast want lists
* (traversals are rcu-locked)
+ * @work: work queue callback item for multicast TT and TVLV updates
*/
struct batadv_priv_mcast {
- struct hlist_head mla_list;
+ struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */
struct hlist_head want_all_unsnoopables_list;
struct hlist_head want_all_ipv4_list;
struct hlist_head want_all_ipv6_list;
@@ -802,6 +822,7 @@ struct batadv_priv_mcast {
atomic_t num_want_all_ipv6;
/* protects want_all_{unsnoopables,ipv4,ipv6}_list */
spinlock_t want_lists_lock;
+ struct delayed_work work;
};
#endif
@@ -1363,7 +1384,8 @@ struct batadv_skb_cb {
/**
* struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
- * @list: list node for batadv_socket_client::queue_list
+ * @list: list node for batadv_priv::forw_{bat,bcast}_list
+ * @cleanup_list: list node for purging functions
* @send_time: execution time for delayed_work (packet sending)
* @own: bool for locally generated packets (local OGMs are re-scheduled after
* sending)
@@ -1380,6 +1402,7 @@ struct batadv_skb_cb {
*/
struct batadv_forw_packet {
struct hlist_node list;
+ struct hlist_node cleanup_list;
unsigned long send_time;
u8 own;
struct sk_buff *skb;
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index b3ff12eb9b6d..4bfaa19a5573 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -20,5 +20,3 @@ bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
bluetooth-$(CONFIG_BT_LEDS) += leds.o
bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
-
-subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index f4fcb4a9d5c1..2b875edf77e1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -211,7 +211,6 @@ static const struct net_device_ops bnep_netdev_ops = {
.ndo_set_rx_mode = bnep_net_set_mc_list,
.ndo_set_mac_address = bnep_net_set_mac_addr,
.ndo_tx_timeout = bnep_net_timeout,
- .ndo_change_mtu = eth_change_mtu,
};
@@ -222,6 +221,8 @@ void bnep_net_setup(struct net_device *dev)
dev->addr_len = ETH_ALEN;
ether_setup(dev);
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &bnep_netdev_ops;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 577f1c01454a..ce0b5dd01953 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2127,7 +2127,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
struct sk_buff **frag;
int sent = 0;
- if (copy_from_iter(skb_put(skb, count), count, &msg->msg_iter) != count)
+ if (!copy_from_iter_full(skb_put(skb, count), count, &msg->msg_iter))
return -EFAULT;
sent += count;
@@ -2147,8 +2147,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
*frag = tmp;
- if (copy_from_iter(skb_put(*frag, count), count,
- &msg->msg_iter) != count)
+ if (!copy_from_iter_full(skb_put(*frag, count), count,
+ &msg->msg_iter))
return -EFAULT;
sent += count;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 43faf2aea2ab..fae391f1871f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -57,7 +57,7 @@
#define SMP_TIMEOUT msecs_to_jiffies(30000)
#define AUTH_REQ_MASK(dev) (hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \
- 0x1f : 0x07)
+ 0x3f : 0x07)
#define KEY_DIST_MASK 0x07
/* Maximum message length that can be passed to aes_cmac */
@@ -76,6 +76,7 @@ enum {
SMP_FLAG_DHKEY_PENDING,
SMP_FLAG_REMOTE_OOB,
SMP_FLAG_LOCAL_OOB,
+ SMP_FLAG_CT2,
};
struct smp_dev {
@@ -357,6 +358,22 @@ static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
return err;
}
+static int smp_h7(struct crypto_shash *tfm_cmac, const u8 w[16],
+ const u8 salt[16], u8 res[16])
+{
+ int err;
+
+ SMP_DBG("w %16phN salt %16phN", w, salt);
+
+ err = aes_cmac(tfm_cmac, salt, w, 16, res);
+ if (err)
+ return err;
+
+ SMP_DBG("res %16phN", res);
+
+ return err;
+}
+
/* The following functions map to the legacy SMP crypto functions e, c1,
* s1 and ah.
*/
@@ -1130,20 +1147,31 @@ static void sc_add_ltk(struct smp_chan *smp)
static void sc_generate_link_key(struct smp_chan *smp)
{
- /* These constants are as specified in the core specification.
- * In ASCII they spell out to 'tmp1' and 'lebr'.
- */
- const u8 tmp1[4] = { 0x31, 0x70, 0x6d, 0x74 };
+ /* From core spec. Spells out in ASCII as 'lebr'. */
const u8 lebr[4] = { 0x72, 0x62, 0x65, 0x6c };
smp->link_key = kzalloc(16, GFP_KERNEL);
if (!smp->link_key)
return;
- if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) {
- kzfree(smp->link_key);
- smp->link_key = NULL;
- return;
+ if (test_bit(SMP_FLAG_CT2, &smp->flags)) {
+ /* SALT = 0x00000000000000000000000000000000746D7031 */
+ const u8 salt[16] = { 0x31, 0x70, 0x6d, 0x74 };
+
+ if (smp_h7(smp->tfm_cmac, smp->tk, salt, smp->link_key)) {
+ kzfree(smp->link_key);
+ smp->link_key = NULL;
+ return;
+ }
+ } else {
+ /* From core spec. Spells out in ASCII as 'tmp1'. */
+ const u8 tmp1[4] = { 0x31, 0x70, 0x6d, 0x74 };
+
+ if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) {
+ kzfree(smp->link_key);
+ smp->link_key = NULL;
+ return;
+ }
}
if (smp_h6(smp->tfm_cmac, smp->link_key, lebr, smp->link_key)) {
@@ -1169,10 +1197,7 @@ static void smp_allow_key_dist(struct smp_chan *smp)
static void sc_generate_ltk(struct smp_chan *smp)
{
- /* These constants are as specified in the core specification.
- * In ASCII they spell out to 'tmp2' and 'brle'.
- */
- const u8 tmp2[4] = { 0x32, 0x70, 0x6d, 0x74 };
+ /* From core spec. Spells out in ASCII as 'brle'. */
const u8 brle[4] = { 0x65, 0x6c, 0x72, 0x62 };
struct hci_conn *hcon = smp->conn->hcon;
struct hci_dev *hdev = hcon->hdev;
@@ -1187,8 +1212,19 @@ static void sc_generate_ltk(struct smp_chan *smp)
if (key->type == HCI_LK_DEBUG_COMBINATION)
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
- if (smp_h6(smp->tfm_cmac, key->val, tmp2, smp->tk))
- return;
+ if (test_bit(SMP_FLAG_CT2, &smp->flags)) {
+ /* SALT = 0x00000000000000000000000000000000746D7032 */
+ const u8 salt[16] = { 0x32, 0x70, 0x6d, 0x74 };
+
+ if (smp_h7(smp->tfm_cmac, key->val, salt, smp->tk))
+ return;
+ } else {
+ /* From core spec. Spells out in ASCII as 'tmp2'. */
+ const u8 tmp2[4] = { 0x32, 0x70, 0x6d, 0x74 };
+
+ if (smp_h6(smp->tfm_cmac, key->val, tmp2, smp->tk))
+ return;
+ }
if (smp_h6(smp->tfm_cmac, smp->tk, brle, smp->tk))
return;
@@ -1669,6 +1705,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
if (!rsp) {
memset(req, 0, sizeof(*req));
+ req->auth_req = SMP_AUTH_CT2;
req->init_key_dist = local_dist;
req->resp_key_dist = remote_dist;
req->max_key_size = conn->hcon->enc_key_size;
@@ -1680,6 +1717,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
memset(rsp, 0, sizeof(*rsp));
+ rsp->auth_req = SMP_AUTH_CT2;
rsp->max_key_size = conn->hcon->enc_key_size;
rsp->init_key_dist = req->init_key_dist & remote_dist;
rsp->resp_key_dist = req->resp_key_dist & local_dist;
@@ -1744,6 +1782,9 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
build_bredr_pairing_cmd(smp, req, &rsp);
+ if (req->auth_req & SMP_AUTH_CT2)
+ set_bit(SMP_FLAG_CT2, &smp->flags);
+
key_size = min(req->max_key_size, rsp.max_key_size);
if (check_enc_key_size(conn, key_size))
return SMP_ENC_KEY_SIZE;
@@ -1761,9 +1802,13 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
build_pairing_cmd(conn, req, &rsp, auth);
- if (rsp.auth_req & SMP_AUTH_SC)
+ if (rsp.auth_req & SMP_AUTH_SC) {
set_bit(SMP_FLAG_SC, &smp->flags);
+ if (rsp.auth_req & SMP_AUTH_CT2)
+ set_bit(SMP_FLAG_CT2, &smp->flags);
+ }
+
if (conn->hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
sec_level = BT_SECURITY_MEDIUM;
else
@@ -1917,6 +1962,9 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
*/
smp->remote_key_dist &= rsp->resp_key_dist;
+ if ((req->auth_req & SMP_AUTH_CT2) && (auth & SMP_AUTH_CT2))
+ set_bit(SMP_FLAG_CT2, &smp->flags);
+
/* For BR/EDR this means we're done and can start phase 3 */
if (conn->hcon->type == ACL_LINK) {
/* Clear bits which are generated but not distributed */
@@ -2312,8 +2360,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
authreq = seclevel_to_authreq(sec_level);
- if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED))
+ if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED)) {
authreq |= SMP_AUTH_SC;
+ if (hci_dev_test_flag(hcon->hdev, HCI_SSP_ENABLED))
+ authreq |= SMP_AUTH_CT2;
+ }
/* Require MITM if IO Capability allows or the security level
* requires it.
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index ffcc70b6b199..0ff6247eaa6c 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -57,6 +57,7 @@ struct smp_cmd_pairing {
#define SMP_AUTH_MITM 0x04
#define SMP_AUTH_SC 0x08
#define SMP_AUTH_KEYPRESS 0x10
+#define SMP_AUTH_CT2 0x20
#define SMP_CMD_PAIRING_CONFIRM 0x03
struct smp_cmd_pairing_confirm {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89a687f3c0a3..ed3b3192fb00 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -19,7 +19,7 @@
#include <linux/list.h>
#include <linux/netfilter_bridge.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "br_private.h"
#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
@@ -185,7 +185,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- if (new_mtu < 68 || new_mtu > br_min_mtu(br))
+ if (new_mtu > br_min_mtu(br))
return -EINVAL;
dev->mtu = new_mtu;
@@ -409,7 +409,8 @@ void br_dev_setup(struct net_device *dev)
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
- br->ageing_time = BR_DEFAULT_AGEING_TIME;
+ br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
+ dev->max_mtu = ETH_MAX_MTU;
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6b43c8c88f19..e4a4176171c9 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -535,9 +535,8 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
*/
if (fdb->is_local)
return 0;
- br_warn(br, "adding interface %s with same address "
- "as a received packet\n",
- source ? source->dev->name : br->dev->name);
+ br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
+ source ? source->dev->name : br->dev->name, addr, vid);
fdb_delete(br, fdb);
}
@@ -583,9 +582,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
/* attempt to update an entry for a local interface */
if (unlikely(fdb->is_local)) {
if (net_ratelimit())
- br_warn(br, "received packet on %s with "
- "own address as source address\n",
- source->dev->name);
+ br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
+ source->dev->name, addr, vid);
} else {
/* fastpath: update of existing entry */
if (unlikely(source != fdb->dst)) {
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index d99b2009771a..da8157c57eb1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -18,7 +18,7 @@
#include <linux/slab.h>
#include <linux/times.h>
#include <net/net_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "br_private.h"
static int get_bridge_ifindices(struct net *net, int *indices, int num)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2136e45f5277..b30e77e8427c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/inetdevice.h>
+#include <linux/mroute.h>
#include <net/ip.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -364,13 +365,18 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
__be32 group,
u8 *igmp_type)
{
+ struct igmpv3_query *ihv3;
+ size_t igmp_hdr_size;
struct sk_buff *skb;
struct igmphdr *ih;
struct ethhdr *eth;
struct iphdr *iph;
+ igmp_hdr_size = sizeof(*ih);
+ if (br->multicast_igmp_version == 3)
+ igmp_hdr_size = sizeof(*ihv3);
skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
- sizeof(*ih) + 4);
+ igmp_hdr_size + 4);
if (!skb)
goto out;
@@ -395,7 +401,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->version = 4;
iph->ihl = 6;
iph->tos = 0xc0;
- iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4);
+ iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
iph->id = 0;
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
@@ -411,17 +417,37 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_put(skb, 24);
skb_set_transport_header(skb, skb->len);
- ih = igmp_hdr(skb);
*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
- ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ih->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
- (HZ / IGMP_TIMER_SCALE);
- ih->group = group;
- ih->csum = 0;
- ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
- skb_put(skb, sizeof(*ih));
+ switch (br->multicast_igmp_version) {
+ case 2:
+ ih = igmp_hdr(skb);
+ ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
+ ih->code = (group ? br->multicast_last_member_interval :
+ br->multicast_query_response_interval) /
+ (HZ / IGMP_TIMER_SCALE);
+ ih->group = group;
+ ih->csum = 0;
+ ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
+ break;
+ case 3:
+ ihv3 = igmpv3_query_hdr(skb);
+ ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
+ ihv3->code = (group ? br->multicast_last_member_interval :
+ br->multicast_query_response_interval) /
+ (HZ / IGMP_TIMER_SCALE);
+ ihv3->group = group;
+ ihv3->qqic = br->multicast_query_interval / HZ;
+ ihv3->nsrcs = 0;
+ ihv3->resv = 0;
+ ihv3->suppress = 0;
+ ihv3->qrv = 2;
+ ihv3->csum = 0;
+ ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
+ break;
+ }
+
+ skb_put(skb, igmp_hdr_size);
__skb_pull(skb, sizeof(*eth));
out:
@@ -433,15 +459,20 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
const struct in6_addr *grp,
u8 *igmp_type)
{
- struct sk_buff *skb;
+ struct mld2_query *mld2q;
+ unsigned long interval;
struct ipv6hdr *ip6h;
struct mld_msg *mldq;
+ size_t mld_hdr_size;
+ struct sk_buff *skb;
struct ethhdr *eth;
u8 *hopopt;
- unsigned long interval;
+ mld_hdr_size = sizeof(*mldq);
+ if (br->multicast_mld_version == 2)
+ mld_hdr_size = sizeof(*mld2q);
skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
- 8 + sizeof(*mldq));
+ 8 + mld_hdr_size);
if (!skb)
goto out;
@@ -460,7 +491,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h = ipv6_hdr(skb);
*(__force __be32 *)ip6h = htonl(0x60000000);
- ip6h->payload_len = htons(8 + sizeof(*mldq));
+ ip6h->payload_len = htons(8 + mld_hdr_size);
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
@@ -488,26 +519,47 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
- mldq = (struct mld_msg *) icmp6_hdr(skb);
-
interval = ipv6_addr_any(grp) ?
br->multicast_query_response_interval :
br->multicast_last_member_interval;
-
*igmp_type = ICMPV6_MGM_QUERY;
- mldq->mld_type = ICMPV6_MGM_QUERY;
- mldq->mld_code = 0;
- mldq->mld_cksum = 0;
- mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
- mldq->mld_reserved = 0;
- mldq->mld_mca = *grp;
-
- /* checksum */
- mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- sizeof(*mldq), IPPROTO_ICMPV6,
- csum_partial(mldq,
- sizeof(*mldq), 0));
- skb_put(skb, sizeof(*mldq));
+ switch (br->multicast_mld_version) {
+ case 1:
+ mldq = (struct mld_msg *)icmp6_hdr(skb);
+ mldq->mld_type = ICMPV6_MGM_QUERY;
+ mldq->mld_code = 0;
+ mldq->mld_cksum = 0;
+ mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
+ mldq->mld_reserved = 0;
+ mldq->mld_mca = *grp;
+ mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ sizeof(*mldq), IPPROTO_ICMPV6,
+ csum_partial(mldq,
+ sizeof(*mldq),
+ 0));
+ break;
+ case 2:
+ mld2q = (struct mld2_query *)icmp6_hdr(skb);
+ mld2q->mld2q_mrc = ntohs((u16)jiffies_to_msecs(interval));
+ mld2q->mld2q_type = ICMPV6_MGM_QUERY;
+ mld2q->mld2q_code = 0;
+ mld2q->mld2q_cksum = 0;
+ mld2q->mld2q_resv1 = 0;
+ mld2q->mld2q_resv2 = 0;
+ mld2q->mld2q_suppress = 0;
+ mld2q->mld2q_qrv = 2;
+ mld2q->mld2q_nsrcs = 0;
+ mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+ mld2q->mld2q_mca = *grp;
+ mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ sizeof(*mld2q),
+ IPPROTO_ICMPV6,
+ csum_partial(mld2q,
+ sizeof(*mld2q),
+ 0));
+ break;
+ }
+ skb_put(skb, mld_hdr_size);
__skb_pull(skb, sizeof(*eth));
@@ -607,7 +659,8 @@ err:
}
struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
- struct net_bridge_port *port, struct br_ip *group)
+ struct net_bridge_port *p,
+ struct br_ip *group)
{
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
@@ -623,7 +676,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
}
hash = br_ip_hash(mdb, group);
- mp = br_multicast_get_group(br, port, group, hash);
+ mp = br_multicast_get_group(br, p, group, hash);
switch (PTR_ERR(mp)) {
case 0:
break;
@@ -680,9 +733,9 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *group)
{
- struct net_bridge_mdb_entry *mp;
- struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
int err;
@@ -860,9 +913,9 @@ static void br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
struct bridge_mcast_own_query *own_query)
{
- unsigned long time;
- struct br_ip br_group;
struct bridge_mcast_other_query *other_query = NULL;
+ struct br_ip br_group;
+ unsigned long time;
if (!netif_running(br->dev) || br->multicast_disabled ||
!br->multicast_querier)
@@ -1638,6 +1691,21 @@ static void br_multicast_err_count(const struct net_bridge *br,
u64_stats_update_end(&pstats->syncp);
}
+static void br_multicast_pim(struct net_bridge *br,
+ struct net_bridge_port *port,
+ const struct sk_buff *skb)
+{
+ unsigned int offset = skb_transport_offset(skb);
+ struct pimhdr *pimhdr, _pimhdr;
+
+ pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr);
+ if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION ||
+ pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
+ return;
+
+ br_multicast_mark_router(br, port);
+}
+
static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
@@ -1650,8 +1718,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = ip_mc_check_igmp(skb, &skb_trimmed);
if (err == -ENOMSG) {
- if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr))
+ if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+ } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
+ if (ip_hdr(skb)->protocol == IPPROTO_PIM)
+ br_multicast_pim(br, port, skb);
+ }
return 0;
} else if (err < 0) {
br_multicast_err_count(br, port, skb->protocol);
@@ -1811,7 +1883,9 @@ void br_multicast_init(struct net_bridge *br)
br->ip4_other_query.delay_time = 0;
br->ip4_querier.port = NULL;
+ br->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
+ br->multicast_mld_version = 1;
br->ip6_other_query.delay_time = 0;
br->ip6_querier.port = NULL;
#endif
@@ -2112,6 +2186,44 @@ unlock:
return err;
}
+int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
+{
+ /* Currently we support only version 2 and 3 */
+ switch (val) {
+ case 2:
+ case 3:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&br->multicast_lock);
+ br->multicast_igmp_version = val;
+ spin_unlock_bh(&br->multicast_lock);
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+{
+ /* Currently we support version 1 and 2 */
+ switch (val) {
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&br->multicast_lock);
+ br->multicast_mld_version = val;
+ spin_unlock_bh(&br->multicast_lock);
+
+ return 0;
+}
+#endif
+
/**
* br_multicast_list_adjacent - Returns snooped multicast addresses
* @dev: The bridge port adjacent to which to retrieve addresses
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2fe9345c1407..95087e6e8258 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -40,13 +40,13 @@
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
-static int brnf_net_id __read_mostly;
+static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
bool enabled;
@@ -399,7 +399,7 @@ bridged_dnat:
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
net, sk, skb, skb->dev,
NULL,
- br_nf_pre_routing_finish);
+ br_nf_pre_routing_finish_bridge);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
}
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
- in, skb->dev, br_forward_finish, 1);
+ br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
+ br_forward_finish);
return 0;
}
@@ -845,8 +845,10 @@ static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
- return NF_STOP;
+ if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
+ state->okfn(state->net, state->sk, skb);
+ return NF_STOLEN;
+ }
return NF_ACCEPT;
}
@@ -1006,20 +1008,20 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
struct nf_hook_state state;
int ret;
- elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
-
- while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
- elem = rcu_dereference(elem->next);
+ for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+ elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
+ elem = rcu_dereference(elem->next))
+ ;
if (!elem)
return okfn(net, sk, skb);
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
- NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+ nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
+ sk, net, okfn);
- ret = nf_hook_slow(skb, &state);
+ ret = nf_hook_slow(skb, &state, elem);
rcu_read_unlock();
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5989661c659f..96c072e71ea2 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -38,7 +38,7 @@
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e99037c6f7b7..7109b389ea58 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
-static int br_dev_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-{
- struct net_bridge *br = netdev_priv(dev);
-
- if (tb[IFLA_ADDRESS]) {
- spin_lock_bh(&br->lock);
- br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
- spin_unlock_bh(&br->lock);
- }
-
- return register_netdevice(dev);
-}
-
static int br_port_slave_changelink(struct net_device *brdev,
struct net_device *dev,
struct nlattr *tb[],
@@ -858,6 +844,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
[IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1069,6 +1057,26 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
br->multicast_stats_enabled = !!mcast_stats;
}
+
+ if (data[IFLA_BR_MCAST_IGMP_VERSION]) {
+ __u8 igmp_version;
+
+ igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
+ err = br_multicast_set_igmp_version(br, igmp_version);
+ if (err)
+ return err;
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (data[IFLA_BR_MCAST_MLD_VERSION]) {
+ __u8 mld_version;
+
+ mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
+ err = br_multicast_set_mld_version(br, mld_version);
+ if (err)
+ return err;
+ }
+#endif
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1093,6 +1101,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return 0;
}
+static int br_dev_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_bridge *br = netdev_priv(dev);
+ int err;
+
+ if (tb[IFLA_ADDRESS]) {
+ spin_lock_bh(&br->lock);
+ br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+ spin_unlock_bh(&br->lock);
+ }
+
+ err = br_changelink(dev, tb, data);
+ if (err)
+ return err;
+
+ return register_netdevice(dev);
+}
+
static size_t br_get_size(const struct net_device *brdev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */
@@ -1135,6 +1162,8 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1210,9 +1239,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
br->multicast_last_member_count) ||
nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
- br->multicast_startup_query_count))
+ br->multicast_startup_query_count) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
+ br->multicast_igmp_version))
return -EMSGSIZE;
-
+#if IS_ENABLED(CONFIG_IPV6)
+ if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
+ br->multicast_mld_version))
+ return -EMSGSIZE;
+#endif
clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
IFLA_BR_PAD))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b63177e0ccd..8ce621e8345c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -300,10 +300,11 @@ struct net_bridge
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
- unsigned long bridge_max_age;
unsigned long ageing_time;
+ unsigned long bridge_max_age;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
+ unsigned long bridge_ageing_time;
u8 group_addr[ETH_ALEN];
bool group_addr_set;
@@ -333,6 +334,8 @@ struct net_bridge
u32 multicast_last_member_count;
u32 multicast_startup_query_count;
+ u8 multicast_igmp_version;
+
unsigned long multicast_last_member_interval;
unsigned long multicast_membership_interval;
unsigned long multicast_querier_interval;
@@ -353,6 +356,7 @@ struct net_bridge
struct bridge_mcast_other_query ip6_other_query;
struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
+ u8 multicast_mld_version;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif
@@ -582,6 +586,10 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
int br_multicast_toggle(struct net_bridge *br, unsigned long val);
int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+#endif
struct net_bridge_mdb_entry *
br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
struct net_bridge_mdb_entry *
@@ -992,6 +1000,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
int br_set_forward_delay(struct net_bridge *br, unsigned long x);
int br_set_hello_time(struct net_bridge *br, unsigned long x);
int br_set_max_age(struct net_bridge *br, unsigned long x);
+int __set_ageing_time(struct net_device *dev, unsigned long t);
int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time);
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 2fe910c4e170..3f7543a29b76 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -61,6 +61,7 @@ void br_received_tcn_bpdu(struct net_bridge_port *p);
void br_transmit_config(struct net_bridge_port *p);
void br_transmit_tcn(struct net_bridge *br);
void br_topology_change_detection(struct net_bridge *br);
+void __br_set_topology_change(struct net_bridge *br, unsigned char val);
/* br_stp_bpdu.c */
void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 9258b8ef14ff..71fd1a4e63cc 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -234,7 +234,7 @@ static void br_record_config_timeout_values(struct net_bridge *br,
br->max_age = bpdu->max_age;
br->hello_time = bpdu->hello_time;
br->forward_delay = bpdu->forward_delay;
- br->topology_change = bpdu->topology_change;
+ __br_set_topology_change(br, bpdu->topology_change);
}
/* called under bridge lock */
@@ -344,7 +344,7 @@ void br_topology_change_detection(struct net_bridge *br)
isroot ? "propagating" : "sending tcn bpdu");
if (isroot) {
- br->topology_change = 1;
+ __br_set_topology_change(br, 1);
mod_timer(&br->topology_change_timer, jiffies
+ br->bridge_forward_delay + br->bridge_max_age);
} else if (!br->topology_change_detected) {
@@ -562,6 +562,24 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+/* called under bridge lock */
+int __set_ageing_time(struct net_device *dev, unsigned long t)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
+ .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
+ .u.ageing_time = jiffies_to_clock_t(t),
+ };
+ int err;
+
+ err = switchdev_port_attr_set(dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
/* Set time interval that dynamic forwarding entries live
* For pure software bridge, allow values outside the 802.1
* standard specification for special cases:
@@ -572,25 +590,52 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
*/
int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
{
- struct switchdev_attr attr = {
- .orig_dev = br->dev,
- .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
- .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
- .u.ageing_time = ageing_time,
- };
unsigned long t = clock_t_to_jiffies(ageing_time);
int err;
- err = switchdev_port_attr_set(br->dev, &attr);
- if (err && err != -EOPNOTSUPP)
+ err = __set_ageing_time(br->dev, t);
+ if (err)
return err;
+ spin_lock_bh(&br->lock);
+ br->bridge_ageing_time = t;
br->ageing_time = t;
+ spin_unlock_bh(&br->lock);
+
mod_timer(&br->gc_timer, jiffies);
return 0;
}
+/* called under bridge lock */
+void __br_set_topology_change(struct net_bridge *br, unsigned char val)
+{
+ unsigned long t;
+ int err;
+
+ if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) {
+ /* On topology change, set the bridge ageing time to twice the
+ * forward delay. Otherwise, restore its default ageing time.
+ */
+
+ if (val) {
+ t = 2 * br->forward_delay;
+ br_debug(br, "decreasing ageing time to %lu\n", t);
+ } else {
+ t = br->bridge_ageing_time;
+ br_debug(br, "restoring ageing time to %lu\n", t);
+ }
+
+ err = __set_ageing_time(br->dev, t);
+ if (err)
+ br_warn(br, "error offloading ageing time\n");
+ else
+ br->ageing_time = t;
+ }
+
+ br->topology_change = val;
+}
+
void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
{
br->bridge_forward_delay = t;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d8ad73b38de2..6c1e21411125 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -36,12 +36,6 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
/* called under bridge lock */
void br_init_port(struct net_bridge_port *p)
{
- struct switchdev_attr attr = {
- .orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
- .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
- .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time),
- };
int err;
p->port_id = br_make_port_id(p->priority, p->port_no);
@@ -50,9 +44,9 @@ void br_init_port(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- err = switchdev_port_attr_set(p->dev, &attr);
- if (err && err != -EOPNOTSUPP)
- netdev_err(p->dev, "failed to set HW ageing time\n");
+ err = __set_ageing_time(p->dev, p->br->ageing_time);
+ if (err)
+ netdev_err(p->dev, "failed to offload ageing time\n");
}
/* NO locks held */
@@ -87,7 +81,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
}
- br->topology_change = 0;
+ __br_set_topology_change(br, 0);
br->topology_change_detected = 0;
spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index da058b85aa22..7ddb38e0a06e 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -125,7 +125,7 @@ static void br_topology_change_timer_expired(unsigned long arg)
br_debug(br, "topo change timer expired\n");
spin_lock(&br->lock);
br->topology_change_detected = 0;
- br->topology_change = 0;
+ __br_set_topology_change(br, 0);
spin_unlock(&br->lock);
}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index f88c4df3f91e..a18148213b08 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -440,6 +440,23 @@ static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
}
static DEVICE_ATTR_RW(hash_max);
+static ssize_t multicast_igmp_version_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+
+ return sprintf(buf, "%u\n", br->multicast_igmp_version);
+}
+
+static ssize_t multicast_igmp_version_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version);
+}
+static DEVICE_ATTR_RW(multicast_igmp_version);
+
static ssize_t multicast_last_member_count_show(struct device *d,
struct device_attribute *attr,
char *buf)
@@ -642,6 +659,25 @@ static ssize_t multicast_stats_enabled_store(struct device *d,
return store_bridge_parm(d, buf, len, set_stats_enabled);
}
static DEVICE_ATTR_RW(multicast_stats_enabled);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static ssize_t multicast_mld_version_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+
+ return sprintf(buf, "%u\n", br->multicast_mld_version);
+}
+
+static ssize_t multicast_mld_version_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_multicast_set_mld_version);
+}
+static DEVICE_ATTR_RW(multicast_mld_version);
+#endif
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static ssize_t nf_call_iptables_show(
@@ -809,6 +845,10 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_multicast_query_response_interval.attr,
&dev_attr_multicast_startup_query_interval.attr,
&dev_attr_multicast_stats_enabled.attr,
+ &dev_attr_multicast_igmp_version.attr,
+#if IS_ENABLED(CONFIG_IPV6)
+ &dev_attr_multicast_mld_version.attr,
+#endif
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
&dev_attr_nf_call_iptables.attr,
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 9cebf47ac840..e7ef1a1ef3a6 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT
config NF_LOG_BRIDGE
tristate "Bridge packet logging"
+ select NF_LOG_COMMON
endif # NF_TABLES_BRIDGE
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 070cf134a22f..5929309beaa1 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (diptr == NULL)
return EBT_DROP;
- arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in,
+ arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr,
+ (struct net_device *)xt_in(par),
*diptr, shp, info->mac, shp);
return info->target;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 9a11086ba6ff..e88bd4827ac1 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_log_info *info = par->targinfo;
struct nf_loginfo li;
- struct net *net = par->net;
+ struct net *net = xt_net(par);
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = info->loglevel;
@@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
* nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
*/
if (info->bitmask & EBT_LOG_NFLOG)
- nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
- par->in, par->out, &li, "%s", info->prefix);
+ nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+ xt_in(par), xt_out(par), &li, "%s",
+ info->prefix);
else
- ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, info->prefix);
+ ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+ xt_in(par), xt_out(par), &li, info->prefix);
return EBT_CONTINUE;
}
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 54816150608e..c1dc48686200 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -23,16 +23,16 @@ static unsigned int
ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nflog_info *info = par->targinfo;
+ struct net *net = xt_net(par);
struct nf_loginfo li;
- struct net *net = par->net;
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, "%s", info->prefix);
+ nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par),
+ xt_out(par), &li, "%s", info->prefix);
return EBT_CONTINUE;
}
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 2e7c4f974340..8d2a85e0594e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (!skb_make_writable(skb, 0))
return EBT_DROP;
- if (par->hooknum != NF_BR_BROUTING)
+ if (xt_hooknum(par) != NF_BR_BROUTING)
/* rcu_read_lock()ed by nf_hook_thresh */
ether_addr_copy(eth_hdr(skb)->h_dest,
- br_port_get_rcu(par->in)->br->dev->dev_addr);
+ br_port_get_rcu(xt_in(par))->br->dev->dev_addr);
else
- ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr);
+ ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr);
skb->pkt_type = PACKET_HOST;
return info->target;
}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ec94c6f1ae88..8fe36dc3aab2 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
struct nf_hook_state state;
int ret;
- nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+ nf_hook_state_init(&state, NF_BR_BROUTING,
NFPROTO_BRIDGE, skb->dev, NULL, NULL,
dev_net(skb->dev), NULL);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f5c11bbe27db..537e3d506fc2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -23,7 +23,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/audit.h>
@@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
const struct ebt_table_info *private;
struct xt_action_param acpar;
- acpar.family = NFPROTO_BRIDGE;
- acpar.net = state->net;
- acpar.in = state->in;
- acpar.out = state->out;
+ acpar.state = state;
acpar.hotdrop = false;
- acpar.hooknum = hook;
read_lock_bh(&table->lock);
private = table->private;
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 1663df598545..bd2b3c78f59b 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -24,21 +24,8 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- case htons(ETH_P_IPV6):
- nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- case htons(ETH_P_ARP):
- case htons(ETH_P_RARP):
- nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- }
+ nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb,
+ in, out, loginfo, prefix);
}
static struct nf_logger nf_bridge_logger __read_mostly = {
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index ad47a921b701..5974dbc1ea24 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
- const struct net_device *in = pkt->in, *out = pkt->out;
+ const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
u32 *dest = &regs->data[priv->dreg];
const struct net_bridge_port *p;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 4b3df6b0e3b9..206dc266ecd2 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
case htons(ETH_P_IP):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
- pkt->in, pkt->hook,
+ nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt),
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb,
- pkt->in, pkt->hook);
+ nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
- pkt->in, pkt->hook,
+ nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt),
nft_reject_icmp_code(priv->icmp_code));
break;
}
@@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
case htons(ETH_P_IPV6):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
- pkt->in, pkt->hook,
+ nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt),
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb,
- pkt->in, pkt->hook);
+ nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
- pkt->in, pkt->hook,
+ nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
+ nft_in(pkt),
+ nft_hook(pkt),
nft_reject_icmpv6_code(priv->icmp_code));
break;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index d730a0f68f46..2d38b6e34203 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -52,7 +52,7 @@ struct caif_net {
struct caif_device_entry_list caifdevs;
};
-static int caif_net_id;
+static unsigned int caif_net_id;
static int q_high = 50; /* Percent */
struct cfcnfg *get_cfcnfg(struct net *net)
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index fa39fc298708..273cb07f57d8 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -390,8 +390,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
rcu_read_lock();
if (adapt_layer == NULL) {
- pr_debug("link setup response but no client exist,"
- "send linkdown back\n");
+ pr_debug("link setup response but no client exist, send linkdown back\n");
cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
goto unlock;
}
@@ -401,8 +400,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
if (phyinfo == NULL) {
- pr_err("ERROR: Link Layer Device disappeared"
- "while connecting\n");
+ pr_err("ERROR: Link Layer Device disappeared while connecting\n");
goto unlock;
}
@@ -436,8 +434,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
break;
default:
- pr_err("Protocol error. Link setup response "
- "- unknown channel type\n");
+ pr_err("Protocol error. Link setup response - unknown channel type\n");
goto unlock;
}
if (!servicel)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 1108079d934f..5488e4a6ccd0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
* @func: callback function on filter match
* @data: returned parameter for callback function
* @ident: string for calling module identification
+ * @sk: socket pointer (might be NULL)
*
* Description:
* Invokes the callback function with the received sk_buff and the given
@@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
*/
int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
void (*func)(struct sk_buff *, void *), void *data,
- char *ident)
+ char *ident, struct sock *sk)
{
struct receiver *r;
struct hlist_head *rl;
@@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
r->func = func;
r->data = data;
r->ident = ident;
+ r->sk = sk;
hlist_add_head_rcu(&r->list, rl);
d->entries++;
@@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register);
static void can_rx_delete_receiver(struct rcu_head *rp)
{
struct receiver *r = container_of(rp, struct receiver, rcu);
+ struct sock *sk = r->sk;
kmem_cache_free(rcv_cache, r);
+ if (sk)
+ sock_put(sk);
}
/**
@@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
spin_unlock(&can_rcvlists_lock);
/* schedule the receiver item for deletion */
- if (r)
+ if (r) {
+ if (r->sk)
+ sock_hold(r->sk);
call_rcu(&r->rcu, can_rx_delete_receiver);
+ }
}
EXPORT_SYMBOL(can_rx_unregister);
diff --git a/net/can/af_can.h b/net/can/af_can.h
index fca0fe9fc45a..b86f5129e838 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -50,13 +50,14 @@
struct receiver {
struct hlist_node list;
- struct rcu_head rcu;
canid_t can_id;
canid_t mask;
unsigned long matches;
void (*func)(struct sk_buff *, void *);
void *data;
char *ident;
+ struct sock *sk;
+ struct rcu_head rcu;
};
#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 436a7537e6a9..95d13b233c65 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -199,11 +199,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' ');
- if (op->kt_ival1.tv64)
+ if (op->kt_ival1)
seq_printf(m, "timeo=%lld ",
(long long)ktime_to_us(op->kt_ival1));
- if (op->kt_ival2.tv64)
+ if (op->kt_ival2)
seq_printf(m, "thr=%lld ",
(long long)ktime_to_us(op->kt_ival2));
@@ -226,11 +226,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
else
seq_printf(m, "[%u] ", op->nframes);
- if (op->kt_ival1.tv64)
+ if (op->kt_ival1)
seq_printf(m, "t1=%lld ",
(long long)ktime_to_us(op->kt_ival1));
- if (op->kt_ival2.tv64)
+ if (op->kt_ival2)
seq_printf(m, "t2=%lld ",
(long long)ktime_to_us(op->kt_ival2));
@@ -365,11 +365,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
static void bcm_tx_start_timer(struct bcm_op *op)
{
- if (op->kt_ival1.tv64 && op->count)
+ if (op->kt_ival1 && op->count)
hrtimer_start(&op->timer,
ktime_add(ktime_get(), op->kt_ival1),
HRTIMER_MODE_ABS);
- else if (op->kt_ival2.tv64)
+ else if (op->kt_ival2)
hrtimer_start(&op->timer,
ktime_add(ktime_get(), op->kt_ival2),
HRTIMER_MODE_ABS);
@@ -380,7 +380,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
struct bcm_op *op = (struct bcm_op *)data;
struct bcm_msg_head msg_head;
- if (op->kt_ival1.tv64 && (op->count > 0)) {
+ if (op->kt_ival1 && (op->count > 0)) {
op->count--;
if (!op->count && (op->flags & TX_COUNTEVT)) {
@@ -398,7 +398,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
}
bcm_can_tx(op);
- } else if (op->kt_ival2.tv64)
+ } else if (op->kt_ival2)
bcm_can_tx(op);
bcm_tx_start_timer(op);
@@ -459,7 +459,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
lastdata->flags |= (RX_RECV|RX_THR);
/* throttling mode inactive ? */
- if (!op->kt_ival2.tv64) {
+ if (!op->kt_ival2) {
/* send RX_CHANGED to the user immediately */
bcm_rx_changed(op, lastdata);
return;
@@ -470,7 +470,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
return;
/* first reception with enabled throttling mode */
- if (!op->kt_lastmsg.tv64)
+ if (!op->kt_lastmsg)
goto rx_changed_settime;
/* got a second frame inside a potential throttle period? */
@@ -537,7 +537,7 @@ static void bcm_rx_starttimer(struct bcm_op *op)
if (op->flags & RX_NO_AUTOTIMER)
return;
- if (op->kt_ival1.tv64)
+ if (op->kt_ival1)
hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
}
@@ -643,7 +643,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
} else {
/* rearm throttle handling */
- op->kt_lastmsg = ktime_set(0, 0);
+ op->kt_lastmsg = 0;
return HRTIMER_NORESTART;
}
}
@@ -734,14 +734,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
static void bcm_remove_op(struct bcm_op *op)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
-
- if (op->tsklet.func)
- tasklet_kill(&op->tsklet);
+ if (op->tsklet.func) {
+ while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
+ test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
+ hrtimer_active(&op->timer)) {
+ hrtimer_cancel(&op->timer);
+ tasklet_kill(&op->tsklet);
+ }
+ }
- if (op->thrtsklet.func)
- tasklet_kill(&op->thrtsklet);
+ if (op->thrtsklet.func) {
+ while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
+ test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
+ hrtimer_active(&op->thrtimer)) {
+ hrtimer_cancel(&op->thrtimer);
+ tasklet_kill(&op->thrtsklet);
+ }
+ }
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -1005,7 +1014,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
/* disable an active timer due to zero values? */
- if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64)
+ if (!op->kt_ival1 && !op->kt_ival2)
hrtimer_cancel(&op->timer);
}
@@ -1189,19 +1198,19 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
/* disable an active timer due to zero value? */
- if (!op->kt_ival1.tv64)
+ if (!op->kt_ival1)
hrtimer_cancel(&op->timer);
/*
* In any case cancel the throttle timer, flush
* potentially blocked msgs and reset throttle handling
*/
- op->kt_lastmsg = ktime_set(0, 0);
+ op->kt_lastmsg = 0;
hrtimer_cancel(&op->thrtimer);
bcm_rx_thr_flush(op, 1);
}
- if ((op->flags & STARTTIMER) && op->kt_ival1.tv64)
+ if ((op->flags & STARTTIMER) && op->kt_ival1)
hrtimer_start(&op->timer, op->kt_ival1,
HRTIMER_MODE_REL);
}
@@ -1216,7 +1225,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
err = can_rx_register(dev, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op,
- "bcm");
+ "bcm", sk);
op->rx_reg_dev = dev;
dev_put(dev);
@@ -1225,7 +1234,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} else
err = can_rx_register(NULL, op->can_id,
REGMASK(op->can_id),
- bcm_rx_handler, op, "bcm");
+ bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
list_del(&op->list);
diff --git a/net/can/gw.c b/net/can/gw.c
index 455168718c2e..7056a1a2bb70 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -429,7 +429,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
/* clear the skb timestamp if not configured the other way */
if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
- nskb->tstamp.tv64 = 0;
+ nskb->tstamp = 0;
/* send to netdevice */
if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
@@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj)
{
return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv,
- gwj, "gw");
+ gwj, "gw", NULL);
}
static inline void cgw_unregister_filter(struct cgw_job *gwj)
diff --git a/net/can/raw.c b/net/can/raw.c
index b075f028d7e2..6dc546a06673 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
for (i = 0; i < count; i++) {
err = can_rx_register(dev, filter[i].can_id,
filter[i].can_mask,
- raw_rcv, sk, "raw");
+ raw_rcv, sk, "raw", sk);
if (err) {
/* clean up successfully registered filters */
while (--i >= 0)
@@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
if (err_mask)
err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
- raw_rcv, sk, "raw");
+ raw_rcv, sk, "raw", sk);
return err;
}
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index c822b3ae1bd3..48bb8d95195b 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,13 +315,13 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
EXPORT_SYMBOL(ceph_auth_update_authorizer);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
- struct ceph_authorizer *a, size_t len)
+ struct ceph_authorizer *a)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->verify_authorizer_reply)
- ret = ac->ops->verify_authorizer_reply(ac, a, len);
+ ret = ac->ops->verify_authorizer_reply(ac, a);
mutex_unlock(&ac->mutex);
return ret;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a0905f04bd13..2034fb926670 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -39,56 +39,58 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
return need != 0;
}
+static int ceph_x_encrypt_offset(void)
+{
+ return sizeof(u32) + sizeof(struct ceph_x_encrypt_header);
+}
+
static int ceph_x_encrypt_buflen(int ilen)
{
- return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
- sizeof(u32);
+ return ceph_x_encrypt_offset() + ilen + 16;
}
-static int ceph_x_encrypt(struct ceph_crypto_key *secret,
- void *ibuf, int ilen, void *obuf, size_t olen)
+static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
+ int buf_len, int plaintext_len)
{
- struct ceph_x_encrypt_header head = {
- .struct_v = 1,
- .magic = cpu_to_le64(CEPHX_ENC_MAGIC)
- };
- size_t len = olen - sizeof(u32);
+ struct ceph_x_encrypt_header *hdr = buf + sizeof(u32);
+ int ciphertext_len;
int ret;
- ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
- &head, sizeof(head), ibuf, ilen);
+ hdr->struct_v = 1;
+ hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC);
+
+ ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32),
+ plaintext_len + sizeof(struct ceph_x_encrypt_header),
+ &ciphertext_len);
if (ret)
return ret;
- ceph_encode_32(&obuf, len);
- return len + sizeof(u32);
+
+ ceph_encode_32(&buf, ciphertext_len);
+ return sizeof(u32) + ciphertext_len;
}
-static int ceph_x_decrypt(struct ceph_crypto_key *secret,
- void **p, void *end, void **obuf, size_t olen)
+static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
{
- struct ceph_x_encrypt_header head;
- size_t head_len = sizeof(head);
- int len, ret;
-
- len = ceph_decode_32(p);
- if (*p + len > end)
- return -EINVAL;
+ struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
+ int ciphertext_len, plaintext_len;
+ int ret;
- dout("ceph_x_decrypt len %d\n", len);
- if (*obuf == NULL) {
- *obuf = kmalloc(len, GFP_NOFS);
- if (!*obuf)
- return -ENOMEM;
- olen = len;
- }
+ ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
+ ceph_decode_need(p, end, ciphertext_len, e_inval);
- ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
+ ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
+ &plaintext_len);
if (ret)
return ret;
- if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
+
+ if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
return -EPERM;
- *p += len;
- return olen;
+
+ *p += ciphertext_len;
+ return plaintext_len - sizeof(struct ceph_x_encrypt_header);
+
+e_inval:
+ return -EINVAL;
}
/*
@@ -143,13 +145,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
int type;
u8 tkt_struct_v, blob_struct_v;
struct ceph_x_ticket_handler *th;
- void *dbuf = NULL;
void *dp, *dend;
int dlen;
char is_enc;
struct timespec validity;
- struct ceph_crypto_key old_key;
- void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
struct ceph_crypto_key new_session_key;
@@ -174,20 +173,17 @@ static int process_one_ticket(struct ceph_auth_client *ac,
}
/* blob for me */
- dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
- if (dlen <= 0) {
- ret = dlen;
+ dp = *p + ceph_x_encrypt_offset();
+ ret = ceph_x_decrypt(secret, p, end);
+ if (ret < 0)
goto out;
- }
- dout(" decrypted %d bytes\n", dlen);
- dp = dbuf;
- dend = dp + dlen;
+ dout(" decrypted %d bytes\n", ret);
+ dend = dp + ret;
tkt_struct_v = ceph_decode_8(&dp);
if (tkt_struct_v != 1)
goto bad;
- memcpy(&old_key, &th->session_key, sizeof(old_key));
ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
if (ret)
goto out;
@@ -203,15 +199,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
ceph_decode_8_safe(p, end, is_enc, bad);
if (is_enc) {
/* encrypted */
- dout(" encrypted ticket\n");
- dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
- if (dlen < 0) {
- ret = dlen;
+ tp = *p + ceph_x_encrypt_offset();
+ ret = ceph_x_decrypt(&th->session_key, p, end);
+ if (ret < 0)
goto out;
- }
- tp = ticket_buf;
+ dout(" encrypted ticket, decrypted %d bytes\n", ret);
ptp = &tp;
- tpend = *ptp + dlen;
+ tpend = tp + ret;
} else {
/* unencrypted */
ptp = p;
@@ -242,8 +236,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
xi->have_keys |= th->service;
out:
- kfree(ticket_buf);
- kfree(dbuf);
return ret;
bad:
@@ -294,7 +286,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
{
int maxlen;
struct ceph_x_authorize_a *msg_a;
- struct ceph_x_authorize_b msg_b;
+ struct ceph_x_authorize_b *msg_b;
void *p, *end;
int ret;
int ticket_blob_len =
@@ -308,8 +300,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
if (ret)
goto out_au;
- maxlen = sizeof(*msg_a) + sizeof(msg_b) +
- ceph_x_encrypt_buflen(ticket_blob_len);
+ maxlen = sizeof(*msg_a) + ticket_blob_len +
+ ceph_x_encrypt_buflen(sizeof(*msg_b));
dout(" need len %d\n", maxlen);
if (au->buf && au->buf->alloc_len < maxlen) {
ceph_buffer_put(au->buf);
@@ -343,18 +335,19 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
p += ticket_blob_len;
end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+ msg_b = p + ceph_x_encrypt_offset();
+ msg_b->struct_v = 1;
get_random_bytes(&au->nonce, sizeof(au->nonce));
- msg_b.struct_v = 1;
- msg_b.nonce = cpu_to_le64(au->nonce);
- ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
- p, end - p);
+ msg_b->nonce = cpu_to_le64(au->nonce);
+ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
if (ret < 0)
goto out_au;
+
p += ret;
+ WARN_ON(p > end);
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
(int)au->buf->vec.iov_len);
- BUG_ON(au->buf->vec.iov_len > maxlen);
return 0;
out_au:
@@ -452,8 +445,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (need & CEPH_ENTITY_TYPE_AUTH) {
struct ceph_x_authenticate *auth = (void *)(head + 1);
void *p = auth + 1;
- struct ceph_x_challenge_blob tmp;
- char tmp_enc[40];
+ void *enc_buf = xi->auth_authorizer.enc_buf;
+ struct ceph_x_challenge_blob *blob = enc_buf +
+ ceph_x_encrypt_offset();
u64 *u;
if (p > end)
@@ -464,16 +458,16 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
/* encrypt and hash */
get_random_bytes(&auth->client_challenge, sizeof(u64));
- tmp.client_challenge = auth->client_challenge;
- tmp.server_challenge = cpu_to_le64(xi->server_challenge);
- ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
- tmp_enc, sizeof(tmp_enc));
+ blob->client_challenge = auth->client_challenge;
+ blob->server_challenge = cpu_to_le64(xi->server_challenge);
+ ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN,
+ sizeof(*blob));
if (ret < 0)
return ret;
auth->struct_v = 1;
auth->key = 0;
- for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
+ for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u;
dout(" server_challenge %llx client_challenge %llx key %llx\n",
xi->server_challenge, le64_to_cpu(auth->client_challenge),
@@ -600,8 +594,8 @@ static int ceph_x_create_authorizer(
auth->authorizer = (struct ceph_authorizer *) au;
auth->authorizer_buf = au->buf->vec.iov_base;
auth->authorizer_buf_len = au->buf->vec.iov_len;
- auth->authorizer_reply_buf = au->reply_buf;
- auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+ auth->authorizer_reply_buf = au->enc_buf;
+ auth->authorizer_reply_buf_len = CEPHX_AU_ENC_BUF_LEN;
auth->sign_message = ac->ops->sign_message;
auth->check_message_signature = ac->ops->check_message_signature;
@@ -629,27 +623,25 @@ static int ceph_x_update_authorizer(
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
- struct ceph_authorizer *a, size_t len)
+ struct ceph_authorizer *a)
{
struct ceph_x_authorizer *au = (void *)a;
- int ret = 0;
- struct ceph_x_authorize_reply reply;
- void *preply = &reply;
- void *p = au->reply_buf;
- void *end = p + sizeof(au->reply_buf);
+ void *p = au->enc_buf;
+ struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset();
+ int ret;
- ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply));
+ ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
if (ret < 0)
return ret;
- if (ret != sizeof(reply))
+ if (ret != sizeof(*reply))
return -EPERM;
- if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
+ if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
else
ret = 0;
dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
- au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
+ au->nonce, le64_to_cpu(reply->nonce_plus_one), ret);
return ret;
}
@@ -704,35 +696,48 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
-static int calcu_signature(struct ceph_x_authorizer *au,
- struct ceph_msg *msg, __le64 *sig)
+static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
+ __le64 *psig)
{
+ void *enc_buf = au->enc_buf;
+ struct {
+ __le32 len;
+ __le32 header_crc;
+ __le32 front_crc;
+ __le32 middle_crc;
+ __le32 data_crc;
+ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
int ret;
- char tmp_enc[40];
- __le32 tmp[5] = {
- cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc,
- msg->footer.middle_crc, msg->footer.data_crc,
- };
- ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
- tmp_enc, sizeof(tmp_enc));
+
+ sigblock->len = cpu_to_le32(4*sizeof(u32));
+ sigblock->header_crc = msg->hdr.crc;
+ sigblock->front_crc = msg->footer.front_crc;
+ sigblock->middle_crc = msg->footer.middle_crc;
+ sigblock->data_crc = msg->footer.data_crc;
+ ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
+ sizeof(*sigblock));
if (ret < 0)
return ret;
- *sig = *(__le64*)(tmp_enc + 4);
+
+ *psig = *(__le64 *)(enc_buf + sizeof(u32));
return 0;
}
static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
struct ceph_msg *msg)
{
+ __le64 sig;
int ret;
if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
- ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
- msg, &msg->footer.sig);
- if (ret < 0)
+ ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &sig);
+ if (ret)
return ret;
+
+ msg->footer.sig = sig;
msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
return 0;
}
@@ -746,9 +751,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
- ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
- msg, &sig_check);
- if (ret < 0)
+ ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &sig_check);
+ if (ret)
return ret;
if (sig_check == msg->footer.sig)
return 0;
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 21a5af904bae..48e9ad41bd2a 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -24,6 +24,7 @@ struct ceph_x_ticket_handler {
unsigned long renew_after, expires;
};
+#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */
struct ceph_x_authorizer {
struct ceph_authorizer base;
@@ -32,7 +33,7 @@ struct ceph_x_authorizer {
unsigned int service;
u64 nonce;
u64 secret_id;
- char reply_buf[128]; /* big enough for encrypted blob */
+ char enc_buf[CEPHX_AU_ENC_BUF_LEN] __aligned(8);
};
struct ceph_x_info {
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a421e905331a..130ab407c5ec 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -17,10 +17,12 @@
# include <linux/kernel.h>
# include <linux/crush/crush.h>
# include <linux/crush/hash.h>
+# include <linux/crush/mapper.h>
#else
# include "crush_compat.h"
# include "crush.h"
# include "hash.h"
+# include "mapper.h"
#endif
#include "crush_ln_table.h"
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index db2847ac5f12..292e33bd916e 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -13,14 +13,60 @@
#include <linux/ceph/decode.h>
#include "crypto.h"
+/*
+ * Set ->key and ->tfm. The rest of the key should be filled in before
+ * this function is called.
+ */
+static int set_secret(struct ceph_crypto_key *key, void *buf)
+{
+ unsigned int noio_flag;
+ int ret;
+
+ key->key = NULL;
+ key->tfm = NULL;
+
+ switch (key->type) {
+ case CEPH_CRYPTO_NONE:
+ return 0; /* nothing to do */
+ case CEPH_CRYPTO_AES:
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ WARN_ON(!key->len);
+ key->key = kmemdup(buf, key->len, GFP_NOIO);
+ if (!key->key) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* crypto_alloc_skcipher() allocates with GFP_KERNEL */
+ noio_flag = memalloc_noio_save();
+ key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+ memalloc_noio_restore(noio_flag);
+ if (IS_ERR(key->tfm)) {
+ ret = PTR_ERR(key->tfm);
+ key->tfm = NULL;
+ goto fail;
+ }
+
+ ret = crypto_skcipher_setkey(key->tfm, key->key, key->len);
+ if (ret)
+ goto fail;
+
+ return 0;
+
+fail:
+ ceph_crypto_key_destroy(key);
+ return ret;
+}
+
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
const struct ceph_crypto_key *src)
{
memcpy(dst, src, sizeof(struct ceph_crypto_key));
- dst->key = kmemdup(src->key, src->len, GFP_NOFS);
- if (!dst->key)
- return -ENOMEM;
- return 0;
+ return set_secret(dst, src->key);
}
int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
@@ -37,16 +83,16 @@ int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
{
+ int ret;
+
ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
key->type = ceph_decode_16(p);
ceph_decode_copy(p, &key->created, sizeof(key->created));
key->len = ceph_decode_16(p);
ceph_decode_need(p, end, key->len, bad);
- key->key = kmalloc(key->len, GFP_NOFS);
- if (!key->key)
- return -ENOMEM;
- ceph_decode_copy(p, key->key, key->len);
- return 0;
+ ret = set_secret(key, *p);
+ *p += key->len;
+ return ret;
bad:
dout("failed to decode crypto key\n");
@@ -80,9 +126,14 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
return 0;
}
-static struct crypto_skcipher *ceph_crypto_alloc_cipher(void)
+void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+ if (key) {
+ kfree(key->key);
+ key->key = NULL;
+ crypto_free_skcipher(key->tfm);
+ key->tfm = NULL;
+ }
}
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
@@ -157,372 +208,82 @@ static void teardown_sgtable(struct sg_table *sgt)
sg_free_table(sgt);
}
-static int ceph_aes_encrypt(const void *key, int key_len,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len)
-{
- struct scatterlist sg_in[2], prealloc_sg;
- struct sg_table sg_out;
- struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
- int ret;
- char iv[AES_BLOCK_SIZE];
- size_t zero_padding = (0x10 - (src_len & 0x0f));
- char pad[16];
-
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- memset(pad, zero_padding, zero_padding);
-
- *dst_len = src_len + zero_padding;
-
- sg_init_table(sg_in, 2);
- sg_set_buf(&sg_in[0], src, src_len);
- sg_set_buf(&sg_in[1], pad, zero_padding);
- ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
- if (ret)
- goto out_tfm;
-
- crypto_skcipher_setkey((void *)tfm, key, key_len);
- memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-
- skcipher_request_set_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
- src_len + zero_padding, iv);
-
- /*
- print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
- key, key_len, 1);
- print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
- src, src_len, 1);
- print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
- pad, zero_padding, 1);
- */
- ret = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- if (ret < 0) {
- pr_err("ceph_aes_crypt failed %d\n", ret);
- goto out_sg;
- }
- /*
- print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
- dst, *dst_len, 1);
- */
-
-out_sg:
- teardown_sgtable(&sg_out);
-out_tfm:
- crypto_free_skcipher(tfm);
- return ret;
-}
-
-static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
- size_t *dst_len,
- const void *src1, size_t src1_len,
- const void *src2, size_t src2_len)
-{
- struct scatterlist sg_in[3], prealloc_sg;
- struct sg_table sg_out;
- struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
- int ret;
- char iv[AES_BLOCK_SIZE];
- size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
- char pad[16];
-
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- memset(pad, zero_padding, zero_padding);
-
- *dst_len = src1_len + src2_len + zero_padding;
-
- sg_init_table(sg_in, 3);
- sg_set_buf(&sg_in[0], src1, src1_len);
- sg_set_buf(&sg_in[1], src2, src2_len);
- sg_set_buf(&sg_in[2], pad, zero_padding);
- ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
- if (ret)
- goto out_tfm;
-
- crypto_skcipher_setkey((void *)tfm, key, key_len);
- memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-
- skcipher_request_set_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
- src1_len + src2_len + zero_padding, iv);
-
- /*
- print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
- key, key_len, 1);
- print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
- src1, src1_len, 1);
- print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
- src2, src2_len, 1);
- print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
- pad, zero_padding, 1);
- */
- ret = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- if (ret < 0) {
- pr_err("ceph_aes_crypt2 failed %d\n", ret);
- goto out_sg;
- }
- /*
- print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
- dst, *dst_len, 1);
- */
-
-out_sg:
- teardown_sgtable(&sg_out);
-out_tfm:
- crypto_free_skcipher(tfm);
- return ret;
-}
-
-static int ceph_aes_decrypt(const void *key, int key_len,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len)
+static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
+ void *buf, int buf_len, int in_len, int *pout_len)
{
- struct sg_table sg_in;
- struct scatterlist sg_out[2], prealloc_sg;
- struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
- char pad[16];
- char iv[AES_BLOCK_SIZE];
+ SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
+ struct sg_table sgt;
+ struct scatterlist prealloc_sg;
+ char iv[AES_BLOCK_SIZE] __aligned(8);
+ int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1));
+ int crypt_len = encrypt ? in_len + pad_byte : in_len;
int ret;
- int last_byte;
-
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
- sg_init_table(sg_out, 2);
- sg_set_buf(&sg_out[0], dst, *dst_len);
- sg_set_buf(&sg_out[1], pad, sizeof(pad));
- ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
+ WARN_ON(crypt_len > buf_len);
+ if (encrypt)
+ memset(buf + in_len, pad_byte, pad_byte);
+ ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len);
if (ret)
- goto out_tfm;
+ return ret;
- crypto_skcipher_setkey((void *)tfm, key, key_len);
memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-
- skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_tfm(req, key->tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
- src_len, iv);
+ skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv);
/*
- print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
- key, key_len, 1);
- print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
- src, src_len, 1);
+ print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1,
+ key->key, key->len, 1);
+ print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1,
+ buf, crypt_len, 1);
*/
- ret = crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
- if (ret < 0) {
- pr_err("ceph_aes_decrypt failed %d\n", ret);
- goto out_sg;
- }
-
- if (src_len <= *dst_len)
- last_byte = ((char *)dst)[src_len - 1];
+ if (encrypt)
+ ret = crypto_skcipher_encrypt(req);
else
- last_byte = pad[src_len - *dst_len - 1];
- if (last_byte <= 16 && src_len >= last_byte) {
- *dst_len = src_len - last_byte;
- } else {
- pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
- last_byte, (int)src_len);
- return -EPERM; /* bad padding */
- }
- /*
- print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
- dst, *dst_len, 1);
- */
-
-out_sg:
- teardown_sgtable(&sg_in);
-out_tfm:
- crypto_free_skcipher(tfm);
- return ret;
-}
-
-static int ceph_aes_decrypt2(const void *key, int key_len,
- void *dst1, size_t *dst1_len,
- void *dst2, size_t *dst2_len,
- const void *src, size_t src_len)
-{
- struct sg_table sg_in;
- struct scatterlist sg_out[3], prealloc_sg;
- struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
- char pad[16];
- char iv[AES_BLOCK_SIZE];
- int ret;
- int last_byte;
-
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- sg_init_table(sg_out, 3);
- sg_set_buf(&sg_out[0], dst1, *dst1_len);
- sg_set_buf(&sg_out[1], dst2, *dst2_len);
- sg_set_buf(&sg_out[2], pad, sizeof(pad));
- ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
- if (ret)
- goto out_tfm;
-
- crypto_skcipher_setkey((void *)tfm, key, key_len);
- memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-
- skcipher_request_set_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
- src_len, iv);
-
- /*
- print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
- key, key_len, 1);
- print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
- src, src_len, 1);
- */
- ret = crypto_skcipher_decrypt(req);
+ ret = crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
- if (ret < 0) {
- pr_err("ceph_aes_decrypt failed %d\n", ret);
- goto out_sg;
- }
-
- if (src_len <= *dst1_len)
- last_byte = ((char *)dst1)[src_len - 1];
- else if (src_len <= *dst1_len + *dst2_len)
- last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
- else
- last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
- if (last_byte <= 16 && src_len >= last_byte) {
- src_len -= last_byte;
- } else {
- pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
- last_byte, (int)src_len);
- return -EPERM; /* bad padding */
- }
-
- if (src_len < *dst1_len) {
- *dst1_len = src_len;
- *dst2_len = 0;
- } else {
- *dst2_len = src_len - *dst1_len;
+ if (ret) {
+ pr_err("%s %scrypt failed: %d\n", __func__,
+ encrypt ? "en" : "de", ret);
+ goto out_sgt;
}
/*
- print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1,
- dst1, *dst1_len, 1);
- print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1,
- dst2, *dst2_len, 1);
+ print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1,
+ buf, crypt_len, 1);
*/
-out_sg:
- teardown_sgtable(&sg_in);
-out_tfm:
- crypto_free_skcipher(tfm);
- return ret;
-}
-
-
-int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
- const void *src, size_t src_len)
-{
- switch (secret->type) {
- case CEPH_CRYPTO_NONE:
- if (*dst_len < src_len)
- return -ERANGE;
- memcpy(dst, src, src_len);
- *dst_len = src_len;
- return 0;
-
- case CEPH_CRYPTO_AES:
- return ceph_aes_decrypt(secret->key, secret->len, dst,
- dst_len, src, src_len);
-
- default:
- return -EINVAL;
- }
-}
-
-int ceph_decrypt2(struct ceph_crypto_key *secret,
- void *dst1, size_t *dst1_len,
- void *dst2, size_t *dst2_len,
- const void *src, size_t src_len)
-{
- size_t t;
-
- switch (secret->type) {
- case CEPH_CRYPTO_NONE:
- if (*dst1_len + *dst2_len < src_len)
- return -ERANGE;
- t = min(*dst1_len, src_len);
- memcpy(dst1, src, t);
- *dst1_len = t;
- src += t;
- src_len -= t;
- if (src_len) {
- t = min(*dst2_len, src_len);
- memcpy(dst2, src, t);
- *dst2_len = t;
+ if (encrypt) {
+ *pout_len = crypt_len;
+ } else {
+ pad_byte = *(char *)(buf + in_len - 1);
+ if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE &&
+ in_len >= pad_byte) {
+ *pout_len = in_len - pad_byte;
+ } else {
+ pr_err("%s got bad padding %d on in_len %d\n",
+ __func__, pad_byte, in_len);
+ ret = -EPERM;
+ goto out_sgt;
}
- return 0;
-
- case CEPH_CRYPTO_AES:
- return ceph_aes_decrypt2(secret->key, secret->len,
- dst1, dst1_len, dst2, dst2_len,
- src, src_len);
-
- default:
- return -EINVAL;
}
-}
-
-int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
- const void *src, size_t src_len)
-{
- switch (secret->type) {
- case CEPH_CRYPTO_NONE:
- if (*dst_len < src_len)
- return -ERANGE;
- memcpy(dst, src, src_len);
- *dst_len = src_len;
- return 0;
- case CEPH_CRYPTO_AES:
- return ceph_aes_encrypt(secret->key, secret->len, dst,
- dst_len, src, src_len);
-
- default:
- return -EINVAL;
- }
+out_sgt:
+ teardown_sgtable(&sgt);
+ return ret;
}
-int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
- const void *src1, size_t src1_len,
- const void *src2, size_t src2_len)
+int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt,
+ void *buf, int buf_len, int in_len, int *pout_len)
{
- switch (secret->type) {
+ switch (key->type) {
case CEPH_CRYPTO_NONE:
- if (*dst_len < src1_len + src2_len)
- return -ERANGE;
- memcpy(dst, src1, src1_len);
- memcpy(dst + src1_len, src2, src2_len);
- *dst_len = src1_len + src2_len;
+ *pout_len = in_len;
return 0;
-
case CEPH_CRYPTO_AES:
- return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
- src1, src1_len, src2, src2_len);
-
+ return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len,
+ pout_len);
default:
- return -EINVAL;
+ return -ENOTSUPP;
}
}
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 2e9cab09f37b..58d83aa7740f 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -12,37 +12,19 @@ struct ceph_crypto_key {
struct ceph_timespec created;
int len;
void *key;
+ struct crypto_skcipher *tfm;
};
-static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
-{
- if (key) {
- kfree(key->key);
- key->key = NULL;
- }
-}
-
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
const struct ceph_crypto_key *src);
int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
+void ceph_crypto_key_destroy(struct ceph_crypto_key *key);
/* crypto.c */
-int ceph_decrypt(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len);
-int ceph_encrypt(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len);
-int ceph_decrypt2(struct ceph_crypto_key *secret,
- void *dst1, size_t *dst1_len,
- void *dst2, size_t *dst2_len,
- const void *src, size_t src_len);
-int ceph_encrypt2(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src1, size_t src1_len,
- const void *src2, size_t src2_len);
+int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt,
+ void *buf, int buf_len, int in_len, int *pout_len);
int ceph_crypto_init(void);
void ceph_crypto_shutdown(void);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a5502898ea33..770c52701efa 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1393,15 +1393,9 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
return NULL;
}
- /* Can't hold the mutex while getting authorizer */
- mutex_unlock(&con->mutex);
auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
- mutex_lock(&con->mutex);
-
if (IS_ERR(auth))
return auth;
- if (con->state != CON_STATE_NEGOTIATING)
- return ERR_PTR(-EAGAIN);
con->auth_reply_buf = auth->authorizer_reply_buf;
con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
@@ -2027,6 +2021,19 @@ static int process_connect(struct ceph_connection *con)
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+ if (con->auth_reply_buf) {
+ /*
+ * Any connection that defines ->get_authorizer()
+ * should also define ->verify_authorizer_reply().
+ * See get_connect_authorizer().
+ */
+ ret = con->ops->verify_authorizer_reply(con);
+ if (ret < 0) {
+ con->error_msg = "bad authorize reply";
+ return ret;
+ }
+ }
+
switch (con->in_reply.tag) {
case CEPH_MSGR_TAG_FEATURES:
pr_err("%s%lld %s feature set mismatch,"
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index a8effc8b7280..29a0ef351c5e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1028,21 +1028,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
err = -ENOMEM;
monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
sizeof(struct ceph_mon_subscribe_ack),
- GFP_NOFS, true);
+ GFP_KERNEL, true);
if (!monc->m_subscribe_ack)
goto out_auth;
- monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128, GFP_NOFS,
- true);
+ monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128,
+ GFP_KERNEL, true);
if (!monc->m_subscribe)
goto out_subscribe_ack;
- monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS,
- true);
+ monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096,
+ GFP_KERNEL, true);
if (!monc->m_auth_reply)
goto out_subscribe;
- monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true);
+ monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_KERNEL, true);
monc->pending_auth = 0;
if (!monc->m_auth)
goto out_auth_reply;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e6ae15bc41b7..842f049abb86 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req)
kref_init(&req->r_kref);
init_completion(&req->r_completion);
- init_completion(&req->r_safe_completion);
+ init_completion(&req->r_done_completion);
RB_CLEAR_NODE(&req->r_node);
RB_CLEAR_NODE(&req->r_mc_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -1725,7 +1725,7 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
__submit_request(req, wrlocked);
}
-static void __finish_request(struct ceph_osd_request *req)
+static void finish_request(struct ceph_osd_request *req)
{
struct ceph_osd_client *osdc = req->r_osdc;
struct ceph_osd *osd = req->r_osd;
@@ -1747,12 +1747,6 @@ static void __finish_request(struct ceph_osd_request *req)
ceph_msg_revoke_incoming(req->r_reply);
}
-static void finish_request(struct ceph_osd_request *req)
-{
- __finish_request(req);
- ceph_osdc_put_request(req);
-}
-
static void __complete_request(struct ceph_osd_request *req)
{
if (req->r_callback)
@@ -1770,9 +1764,9 @@ static void complete_request(struct ceph_osd_request *req, int err)
dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
req->r_result = err;
- __finish_request(req);
+ finish_request(req);
__complete_request(req);
- complete_all(&req->r_safe_completion);
+ complete_all(&req->r_done_completion);
ceph_osdc_put_request(req);
}
@@ -1798,6 +1792,8 @@ static void cancel_request(struct ceph_osd_request *req)
cancel_map_check(req);
finish_request(req);
+ complete_all(&req->r_done_completion);
+ ceph_osdc_put_request(req);
}
static void check_pool_dne(struct ceph_osd_request *req)
@@ -2808,12 +2804,12 @@ static bool done_request(const struct ceph_osd_request *req,
* ->r_unsafe_callback is set? yes no
*
* first reply is OK (needed r_cb/r_completion, r_cb/r_completion,
- * any or needed/got safe) r_safe_completion r_safe_completion
+ * any or needed/got safe) r_done_completion r_done_completion
*
* first reply is unsafe r_unsafe_cb(true) (nothing)
*
* when we get the safe reply r_unsafe_cb(false), r_cb/r_completion,
- * r_safe_completion r_safe_completion
+ * r_done_completion r_done_completion
*/
static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
{
@@ -2915,7 +2911,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
}
if (done_request(req, &m)) {
- __finish_request(req);
+ finish_request(req);
if (req->r_linger) {
WARN_ON(req->r_unsafe_callback);
dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
@@ -2934,8 +2930,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
dout("req %p tid %llu cb\n", req, req->r_tid);
__complete_request(req);
}
- if (m.flags & CEPH_OSD_FLAG_ONDISK)
- complete_all(&req->r_safe_completion);
+ complete_all(&req->r_done_completion);
ceph_osdc_put_request(req);
} else {
if (req->r_unsafe_callback) {
@@ -3471,9 +3466,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed (i.e.
- * no callbacks or wakeups) - higher layers are supposed to know what
- * they are canceling.
+ * Unregister a registered request. The request is not completed:
+ * ->r_result isn't set and __complete_request() isn't called.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -3500,9 +3494,6 @@ static int wait_request_timeout(struct ceph_osd_request *req,
if (left <= 0) {
left = left ?: -ETIMEDOUT;
ceph_osdc_cancel_request(req);
-
- /* kludge - need to to wake ceph_osdc_sync() */
- complete_all(&req->r_safe_completion);
} else {
left = req->r_result; /* completed */
}
@@ -3549,7 +3540,7 @@ again:
up_read(&osdc->lock);
dout("%s waiting on req %p tid %llu last_tid %llu\n",
__func__, req, req->r_tid, last_tid);
- wait_for_completion(&req->r_safe_completion);
+ wait_for_completion(&req->r_done_completion);
ceph_osdc_put_request(req);
goto again;
}
@@ -4478,13 +4469,13 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
}
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
+static int verify_authorizer_reply(struct ceph_connection *con)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
- return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+ return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
}
static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/net/compat.c b/net/compat.c
index 1cd2ec046164..96c544b05b15 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -28,7 +28,7 @@
#include <net/sock.h>
#include <net/ip.h>
#include <net/ipv6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/compat.h>
int get_compat_msghdr(struct msghdr *kmsg,
diff --git a/net/core/Makefile b/net/core/Makefile
index d6508c2ddca5..f6761b6e3b29 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7de71f8d5d3..662bea587165 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -36,7 +36,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
@@ -165,6 +165,7 @@ done:
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
+ * @destructor: invoked under the receive lock on successful dequeue
* @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
@@ -197,6 +198,8 @@ done:
* the standard around please.
*/
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+ void (*destructor)(struct sock *sk,
+ struct sk_buff *skb),
int *peeked, int *off, int *err,
struct sk_buff **last)
{
@@ -211,6 +214,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
if (error)
goto no_packet;
+ *peeked = 0;
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
@@ -224,26 +228,28 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
*last = skb;
- *peeked = skb->peeked;
if (flags & MSG_PEEK) {
if (_off >= skb->len && (skb->len || _off ||
skb->peeked)) {
_off -= skb->len;
continue;
}
-
- skb = skb_set_peeked(skb);
- error = PTR_ERR(skb);
- if (IS_ERR(skb)) {
- spin_unlock_irqrestore(&queue->lock,
- cpu_flags);
- goto no_packet;
+ if (!skb->len) {
+ skb = skb_set_peeked(skb);
+ if (IS_ERR(skb)) {
+ error = PTR_ERR(skb);
+ spin_unlock_irqrestore(&queue->lock,
+ cpu_flags);
+ goto no_packet;
+ }
}
-
+ *peeked = 1;
atomic_inc(&skb->users);
- } else
+ } else {
__skb_unlink(skb, queue);
-
+ if (destructor)
+ destructor(sk, skb);
+ }
spin_unlock_irqrestore(&queue->lock, cpu_flags);
*off = _off;
return skb;
@@ -262,6 +268,8 @@ no_packet:
EXPORT_SYMBOL(__skb_try_recv_datagram);
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+ void (*destructor)(struct sock *sk,
+ struct sk_buff *skb),
int *peeked, int *off, int *err)
{
struct sk_buff *skb, *last;
@@ -270,8 +278,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
- &last);
+ skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
+ off, err, &last);
if (skb)
return skb;
@@ -290,7 +298,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
int peeked, off = 0;
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
- &peeked, &off, err);
+ NULL, &peeked, &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
@@ -323,6 +331,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int flags)
+{
+ int err = 0;
+
+ if (flags & MSG_PEEK) {
+ err = -ENOENT;
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (skb == skb_peek(&sk->sk_receive_queue)) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ atomic_dec(&skb->users);
+ err = 0;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ }
+
+ atomic_inc(&sk->sk_drops);
+ return err;
+}
+EXPORT_SYMBOL(__sk_queue_drop_skb);
+
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
* @sk: socket
@@ -346,23 +375,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
- int err = 0;
-
- if (flags & MSG_PEEK) {
- err = -ENOENT;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- atomic_dec(&skb->users);
- err = 0;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- }
+ int err = __sk_queue_drop_skb(sk, skb, flags);
kfree_skb(skb);
- atomic_inc(&sk->sk_drops);
sk_mem_reclaim_partial(sk);
-
return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6666b28b6815..7f218e095361 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -72,7 +72,7 @@
* - netif_rx() feedback
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/cpu.h>
@@ -139,7 +139,6 @@
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
-#include <linux/sctp.h>
#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -1732,14 +1731,14 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
- skb->tstamp.tv64 = 0;
+ skb->tstamp = 0;
if (static_key_false(&netstamp_needed))
__net_timestamp(skb);
}
#define net_timestamp_check(COND, SKB) \
if (static_key_false(&netstamp_needed)) { \
- if ((COND) && !(SKB)->tstamp.tv64) \
+ if ((COND) && !(SKB)->tstamp) \
__net_timestamp(SKB); \
} \
@@ -1944,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
}
}
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
+{
+ if (dev->num_tc) {
+ struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+ int i;
+
+ for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
+ if ((txq - tc->offset) < tc->count)
+ return i;
+ }
+
+ return -1;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
- int cpu, u16 index)
+static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
+ int tci, u16 index)
{
struct xps_map *map = NULL;
int pos;
if (dev_maps)
- map = xmap_dereference(dev_maps->cpu_map[cpu]);
+ map = xmap_dereference(dev_maps->cpu_map[tci]);
+ if (!map)
+ return false;
- for (pos = 0; map && pos < map->len; pos++) {
- if (map->queues[pos] == index) {
- if (map->len > 1) {
- map->queues[pos] = map->queues[--map->len];
- } else {
- RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
- kfree_rcu(map, rcu);
- map = NULL;
- }
+ for (pos = map->len; pos--;) {
+ if (map->queues[pos] != index)
+ continue;
+
+ if (map->len > 1) {
+ map->queues[pos] = map->queues[--map->len];
break;
}
+
+ RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+ kfree_rcu(map, rcu);
+ return false;
+ }
+
+ return true;
+}
+
+static bool remove_xps_queue_cpu(struct net_device *dev,
+ struct xps_dev_maps *dev_maps,
+ int cpu, u16 offset, u16 count)
+{
+ int num_tc = dev->num_tc ? : 1;
+ bool active = false;
+ int tci;
+
+ for (tci = cpu * num_tc; num_tc--; tci++) {
+ int i, j;
+
+ for (i = count, j = offset; i--; j++) {
+ if (!remove_xps_queue(dev_maps, cpu, j))
+ break;
+ }
+
+ active |= i < 0;
}
- return map;
+ return active;
}
-static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
+ u16 count)
{
struct xps_dev_maps *dev_maps;
int cpu, i;
@@ -1986,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
if (!dev_maps)
goto out_no_maps;
- for_each_possible_cpu(cpu) {
- for (i = index; i < dev->num_tx_queues; i++) {
- if (!remove_xps_queue(dev_maps, cpu, i))
- break;
- }
- if (i == dev->num_tx_queues)
- active = true;
- }
+ for_each_possible_cpu(cpu)
+ active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
+ offset, count);
if (!active) {
RCU_INIT_POINTER(dev->xps_maps, NULL);
kfree_rcu(dev_maps, rcu);
}
- for (i = index; i < dev->num_tx_queues; i++)
+ for (i = offset + (count - 1); count--; i--)
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
NUMA_NO_NODE);
@@ -2008,6 +2045,11 @@ out_no_maps:
mutex_unlock(&xps_map_mutex);
}
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+{
+ netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
+}
+
static struct xps_map *expand_xps_map(struct xps_map *map,
int cpu, u16 index)
{
@@ -2047,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index)
{
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+ int i, cpu, tci, numa_node_id = -2;
+ int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map;
- int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
- int cpu, numa_node_id = -2;
bool active = false;
+ if (dev->num_tc) {
+ num_tc = dev->num_tc;
+ tc = netdev_txq_to_tc(dev, index);
+ if (tc < 0)
+ return -EINVAL;
+ }
+
+ maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
+ if (maps_sz < L1_CACHE_BYTES)
+ maps_sz = L1_CACHE_BYTES;
+
mutex_lock(&xps_map_mutex);
dev_maps = xmap_dereference(dev->xps_maps);
/* allocate memory for queue storage */
- for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, mask))
- continue;
-
+ for_each_cpu_and(cpu, cpu_online_mask, mask) {
if (!new_dev_maps)
new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
if (!new_dev_maps) {
@@ -2068,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
return -ENOMEM;
}
- map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+ tci = cpu * num_tc + tc;
+ map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
NULL;
map = expand_xps_map(map, cpu, index);
if (!map)
goto error;
- RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
}
if (!new_dev_maps)
goto out_no_new_maps;
for_each_possible_cpu(cpu) {
+ /* copy maps belonging to foreign traffic classes */
+ for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+ /* fill in the new device map from the old device map */
+ map = xmap_dereference(dev_maps->cpu_map[tci]);
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+ }
+
+ /* We need to explicitly update tci as prevous loop
+ * could break out early if dev_maps is NULL.
+ */
+ tci = cpu * num_tc + tc;
+
if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
/* add queue to CPU maps */
int pos = 0;
- map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+ map = xmap_dereference(new_dev_maps->cpu_map[tci]);
while ((pos < map->len) && (map->queues[pos] != index))
pos++;
@@ -2100,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
#endif
} else if (dev_maps) {
/* fill in the new device map from the old device map */
- map = xmap_dereference(dev_maps->cpu_map[cpu]);
- RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+ map = xmap_dereference(dev_maps->cpu_map[tci]);
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
}
+ /* copy maps belonging to foreign traffic classes */
+ for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
+ /* fill in the new device map from the old device map */
+ map = xmap_dereference(dev_maps->cpu_map[tci]);
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+ }
}
rcu_assign_pointer(dev->xps_maps, new_dev_maps);
/* Cleanup old maps */
- if (dev_maps) {
- for_each_possible_cpu(cpu) {
- new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
- map = xmap_dereference(dev_maps->cpu_map[cpu]);
+ if (!dev_maps)
+ goto out_no_old_maps;
+
+ for_each_possible_cpu(cpu) {
+ for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+ new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+ map = xmap_dereference(dev_maps->cpu_map[tci]);
if (map && map != new_map)
kfree_rcu(map, rcu);
}
-
- kfree_rcu(dev_maps, rcu);
}
+ kfree_rcu(dev_maps, rcu);
+
+out_no_old_maps:
dev_maps = new_dev_maps;
active = true;
@@ -2134,11 +2207,12 @@ out_no_new_maps:
/* removes queue from unused CPUs */
for_each_possible_cpu(cpu) {
- if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
- continue;
-
- if (remove_xps_queue(dev_maps, cpu, index))
- active = true;
+ for (i = tc, tci = cpu * num_tc; i--; tci++)
+ active |= remove_xps_queue(dev_maps, tci, index);
+ if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+ active |= remove_xps_queue(dev_maps, tci, index);
+ for (i = num_tc - tc, tci++; --i; tci++)
+ active |= remove_xps_queue(dev_maps, tci, index);
}
/* free map if not active */
@@ -2154,11 +2228,14 @@ out_no_maps:
error:
/* remove any maps that we added */
for_each_possible_cpu(cpu) {
- new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
- map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
- NULL;
- if (new_map && new_map != map)
- kfree(new_map);
+ for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+ new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+ map = dev_maps ?
+ xmap_dereference(dev_maps->cpu_map[tci]) :
+ NULL;
+ if (new_map && new_map != map)
+ kfree(new_map);
+ }
}
mutex_unlock(&xps_map_mutex);
@@ -2169,6 +2246,44 @@ error:
EXPORT_SYMBOL(netif_set_xps_queue);
#endif
+void netdev_reset_tc(struct net_device *dev)
+{
+#ifdef CONFIG_XPS
+ netif_reset_xps_queues_gt(dev, 0);
+#endif
+ dev->num_tc = 0;
+ memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+ memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+EXPORT_SYMBOL(netdev_reset_tc);
+
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+ if (tc >= dev->num_tc)
+ return -EINVAL;
+
+#ifdef CONFIG_XPS
+ netif_reset_xps_queues(dev, offset, count);
+#endif
+ dev->tc_to_txq[tc].count = count;
+ dev->tc_to_txq[tc].offset = offset;
+ return 0;
+}
+EXPORT_SYMBOL(netdev_set_tc_queue);
+
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+ if (num_tc > TC_MAX_QUEUE)
+ return -EINVAL;
+
+#ifdef CONFIG_XPS
+ netif_reset_xps_queues_gt(dev, 0);
+#endif
+ dev->num_tc = num_tc;
+ return 0;
+}
+EXPORT_SYMBOL(netdev_set_num_tc);
+
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2487,141 +2602,6 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
-/* skb_csum_offload_check - Driver helper function to determine if a device
- * with limited checksum offload capabilities is able to offload the checksum
- * for a given packet.
- *
- * Arguments:
- * skb - sk_buff for the packet in question
- * spec - contains the description of what device can offload
- * csum_encapped - returns true if the checksum being offloaded is
- * encpasulated. That is it is checksum for the transport header
- * in the inner headers.
- * checksum_help - when set indicates that helper function should
- * call skb_checksum_help if offload checks fail
- *
- * Returns:
- * true: Packet has passed the checksum checks and should be offloadable to
- * the device (a driver may still need to check for additional
- * restrictions of its device)
- * false: Checksum is not offloadable. If checksum_help was set then
- * skb_checksum_help was called to resolve checksum for non-GSO
- * packets and when IP protocol is not SCTP
- */
-bool __skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help)
-{
- struct iphdr *iph;
- struct ipv6hdr *ipv6;
- void *nhdr;
- int protocol;
- u8 ip_proto;
-
- if (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD)) {
- if (!spec->vlan_okay)
- goto need_help;
- }
-
- /* We check whether the checksum refers to a transport layer checksum in
- * the outermost header or an encapsulated transport layer checksum that
- * corresponds to the inner headers of the skb. If the checksum is for
- * something else in the packet we need help.
- */
- if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
- /* Non-encapsulated checksum */
- protocol = eproto_to_ipproto(vlan_get_protocol(skb));
- nhdr = skb_network_header(skb);
- *csum_encapped = false;
- if (spec->no_not_encapped)
- goto need_help;
- } else if (skb->encapsulation && spec->encap_okay &&
- skb_checksum_start_offset(skb) ==
- skb_inner_transport_offset(skb)) {
- /* Encapsulated checksum */
- *csum_encapped = true;
- switch (skb->inner_protocol_type) {
- case ENCAP_TYPE_ETHER:
- protocol = eproto_to_ipproto(skb->inner_protocol);
- break;
- case ENCAP_TYPE_IPPROTO:
- protocol = skb->inner_protocol;
- break;
- }
- nhdr = skb_inner_network_header(skb);
- } else {
- goto need_help;
- }
-
- switch (protocol) {
- case IPPROTO_IP:
- if (!spec->ipv4_okay)
- goto need_help;
- iph = nhdr;
- ip_proto = iph->protocol;
- if (iph->ihl != 5 && !spec->ip_options_okay)
- goto need_help;
- break;
- case IPPROTO_IPV6:
- if (!spec->ipv6_okay)
- goto need_help;
- if (spec->no_encapped_ipv6 && *csum_encapped)
- goto need_help;
- ipv6 = nhdr;
- nhdr += sizeof(*ipv6);
- ip_proto = ipv6->nexthdr;
- break;
- default:
- goto need_help;
- }
-
-ip_proto_again:
- switch (ip_proto) {
- case IPPROTO_TCP:
- if (!spec->tcp_okay ||
- skb->csum_offset != offsetof(struct tcphdr, check))
- goto need_help;
- break;
- case IPPROTO_UDP:
- if (!spec->udp_okay ||
- skb->csum_offset != offsetof(struct udphdr, check))
- goto need_help;
- break;
- case IPPROTO_SCTP:
- if (!spec->sctp_okay ||
- skb->csum_offset != offsetof(struct sctphdr, checksum))
- goto cant_help;
- break;
- case NEXTHDR_HOP:
- case NEXTHDR_ROUTING:
- case NEXTHDR_DEST: {
- u8 *opthdr = nhdr;
-
- if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
- goto need_help;
-
- ip_proto = opthdr[0];
- nhdr += (opthdr[1] + 1) << 3;
-
- goto ip_proto_again;
- }
- default:
- goto need_help;
- }
-
- /* Passed the tests for offloading checksum */
- return true;
-
-need_help:
- if (csum_help && !skb_shinfo(skb)->gso_size)
- skb_checksum_help(skb);
-cant_help:
- return false;
-}
-EXPORT_SYMBOL(__skb_csum_offload_chk);
-
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -2815,9 +2795,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, type)) {
features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
- } else if (illegal_highdma(skb->dev, skb)) {
- features &= ~NETIF_F_SG;
}
+ if (illegal_highdma(skb->dev, skb))
+ features &= ~NETIF_F_SG;
return features;
}
@@ -3216,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
- map = rcu_dereference(
- dev_maps->cpu_map[skb->sender_cpu - 1]);
+ unsigned int tci = skb->sender_cpu - 1;
+
+ if (dev->num_tc) {
+ tci *= dev->num_tc;
+ tci += netdev_get_prio_tc_map(dev, skb->priority);
+ }
+
+ map = rcu_dereference(dev_maps->cpu_map[tci]);
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
@@ -3461,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
+struct static_key rfs_needed __read_mostly;
+EXPORT_SYMBOL(rfs_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -4453,7 +4441,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
- NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
+ NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
+ skb_frag_size(frag0),
+ skb->end - skb->tail);
}
}
@@ -4491,7 +4481,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
- if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
+ if (skb->csum_bad)
goto normal;
gro_list_prepare(napi, skb);
@@ -4504,7 +4494,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
NAPI_GRO_CB(skb)->same_flow = 0;
- NAPI_GRO_CB(skb)->flush = 0;
+ NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
NAPI_GRO_CB(skb)->recursion_counter = 0;
@@ -4912,26 +4902,36 @@ void __napi_schedule_irqoff(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
-void __napi_complete(struct napi_struct *n)
+bool __napi_complete(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ /* Some drivers call us directly, instead of calling
+ * napi_complete_done().
+ */
+ if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
+ return false;
+
list_del_init(&n->poll_list);
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
+ return true;
}
EXPORT_SYMBOL(__napi_complete);
-void napi_complete_done(struct napi_struct *n, int work_done)
+bool napi_complete_done(struct napi_struct *n, int work_done)
{
unsigned long flags;
/*
- * don't let napi dequeue from the cpu poll list
- * just in case its running on a different cpu
+ * 1) Don't let napi dequeue from the cpu poll list
+ * just in case its running on a different cpu.
+ * 2) If we are busy polling, do nothing here, we have
+ * the guarantee we will be called later.
*/
- if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
- return;
+ if (unlikely(n->state & (NAPIF_STATE_NPSVC |
+ NAPIF_STATE_IN_BUSY_POLL)))
+ return false;
if (n->gro_list) {
unsigned long timeout = 0;
@@ -4953,6 +4953,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
__napi_complete(n);
local_irq_restore(flags);
}
+ return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4970,13 +4971,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
}
#if defined(CONFIG_NET_RX_BUSY_POLL)
+
#define BUSY_POLL_BUDGET 8
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
+{
+ int rc;
+
+ clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+
+ local_bh_disable();
+
+ /* All we really want here is to re-enable device interrupts.
+ * Ideally, a new ndo_busy_poll_stop() could avoid another round.
+ */
+ rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ netpoll_poll_unlock(have_poll_lock);
+ if (rc == BUSY_POLL_BUDGET)
+ __napi_schedule(napi);
+ local_bh_enable();
+ if (local_softirq_pending())
+ do_softirq();
+}
+
bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ int (*napi_poll)(struct napi_struct *napi, int budget);
int (*busy_poll)(struct napi_struct *dev);
+ void *have_poll_lock = NULL;
struct napi_struct *napi;
- int rc = false;
+ int rc;
+
+restart:
+ rc = false;
+ napi_poll = NULL;
rcu_read_lock();
@@ -4987,24 +5016,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
/* Note: ndo_busy_poll method is optional in linux-4.5 */
busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
- do {
+ preempt_disable();
+ for (;;) {
rc = 0;
local_bh_disable();
if (busy_poll) {
rc = busy_poll(napi);
- } else if (napi_schedule_prep(napi)) {
- void *have = netpoll_poll_lock(napi);
-
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
- rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
- if (rc == BUSY_POLL_BUDGET) {
- napi_complete_done(napi, rc);
- napi_schedule(napi);
- }
- }
- netpoll_poll_unlock(have);
+ goto count;
}
+ if (!napi_poll) {
+ unsigned long val = READ_ONCE(napi->state);
+
+ /* If multiple threads are competing for this napi,
+ * we avoid dirtying napi->state as much as we can.
+ */
+ if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
+ NAPIF_STATE_IN_BUSY_POLL))
+ goto count;
+ if (cmpxchg(&napi->state, val,
+ val | NAPIF_STATE_IN_BUSY_POLL |
+ NAPIF_STATE_SCHED) != val)
+ goto count;
+ have_poll_lock = netpoll_poll_lock(napi);
+ napi_poll = napi->poll;
+ }
+ rc = napi_poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+count:
if (rc > 0)
__NET_ADD_STATS(sock_net(sk),
LINUX_MIB_BUSYPOLLRXPACKETS, rc);
@@ -5013,10 +5051,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */
- cpu_relax();
- } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
- !need_resched() && !busy_loop_timeout(end_time));
+ if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
+ busy_loop_timeout(end_time))
+ break;
+ if (unlikely(need_resched())) {
+ if (napi_poll)
+ busy_poll_stop(napi, have_poll_lock);
+ preempt_enable();
+ rcu_read_unlock();
+ cond_resched();
+ rc = !skb_queue_empty(&sk->sk_receive_queue);
+ if (rc || busy_loop_timeout(end_time))
+ return rc;
+ goto restart;
+ }
+ cpu_relax();
+ }
+ if (napi_poll)
+ busy_poll_stop(napi, have_poll_lock);
+ preempt_enable();
rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
@@ -5026,7 +5080,7 @@ EXPORT_SYMBOL(sk_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
-void napi_hash_add(struct napi_struct *napi)
+static void napi_hash_add(struct napi_struct *napi)
{
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
@@ -5046,7 +5100,6 @@ void napi_hash_add(struct napi_struct *napi)
spin_unlock(&napi_hash_lock);
}
-EXPORT_SYMBOL_GPL(napi_hash_add);
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
@@ -5094,7 +5147,6 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
#ifdef CONFIG_NETPOLL
- spin_lock_init(&napi->poll_lock);
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
@@ -5212,7 +5264,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto out;
break;
}
@@ -5230,7 +5282,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
}
}
- __kfree_skb_flush();
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -5240,6 +5291,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+out:
+ __kfree_skb_flush();
}
struct netdev_adjacent {
@@ -5270,6 +5323,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
return NULL;
}
+static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+{
+ struct net_device *dev = data;
+
+ return upper_dev == dev;
+}
+
/**
* netdev_has_upper_dev - Check if device is linked to an upper device
* @dev: device
@@ -5284,11 +5344,30 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
+ return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
/**
+ * netdev_has_upper_dev_all - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks the entire upper device chain.
+ * The caller must hold rcu lock.
+ */
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
+
+/**
* netdev_has_any_upper_dev - Check if device is linked to some device
* @dev: device
*
@@ -5299,7 +5378,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
- return !list_empty(&dev->all_adj_list.upper);
+ return !list_empty(&dev->adj_list.upper);
}
/**
@@ -5326,6 +5405,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/**
+ * netdev_has_any_lower_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to a lower device and return true in case
+ * it is. The caller must hold the RTNL lock.
+ */
+static bool netdev_has_any_lower_dev(struct net_device *dev)
+{
+ ASSERT_RTNL();
+
+ return !list_empty(&dev->adj_list.lower);
+}
+
void *netdev_adjacent_get_private(struct list_head *adj_list)
{
struct netdev_adjacent *adj;
@@ -5362,16 +5455,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
-/**
- * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next device from the dev's upper list, starting from iter
- * position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *upper;
@@ -5379,14 +5464,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
- if (&upper->list == &dev->all_adj_list.upper)
+ if (&upper->list == &dev->adj_list.upper)
return NULL;
*iter = &upper->list;
return upper->dev;
}
-EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
+
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *udev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.upper,
+ udev = netdev_next_upper_dev_rcu(dev, &iter);
+ udev;
+ udev = netdev_next_upper_dev_rcu(dev, &iter)) {
+ /* first is the upper device itself */
+ ret = fn(udev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its upper devices */
+ ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
/**
* netdev_lower_get_next_private - Get the next ->private from the
@@ -5469,55 +5581,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
}
EXPORT_SYMBOL(netdev_lower_get_next);
-/**
- * netdev_all_lower_get_next - Get the next device from all lower neighbour list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RTNL lock or
- * its own locking that guarantees that the neighbour all lower
- * list will remain unchanged.
- */
-struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+static struct net_device *netdev_next_lower_dev(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
- lower = list_entry(*iter, struct netdev_adjacent, list);
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
- if (&lower->list == &dev->all_adj_list.lower)
+ if (&lower->list == &dev->adj_list.lower)
return NULL;
- *iter = lower->list.next;
+ *iter = &lower->list;
return lower->dev;
}
-EXPORT_SYMBOL(netdev_all_lower_get_next);
-/**
- * netdev_all_lower_get_next_rcu - Get the next device from all
- * lower neighbour list, RCU variant
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
- struct list_head **iter)
+int netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.lower,
+ ldev = netdev_next_lower_dev(dev, &iter);
+ ldev;
+ ldev = netdev_next_lower_dev(dev, &iter)) {
+ /* first is the lower device itself */
+ ret = fn(ldev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its lower devices */
+ ret = netdev_walk_all_lower_dev(ldev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+
+static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
-
- if (&lower->list == &dev->all_adj_list.lower)
+ if (&lower->list == &dev->adj_list.lower)
return NULL;
*iter = &lower->list;
return lower->dev;
}
-EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.lower,
+ ldev = netdev_next_lower_dev_rcu(dev, &iter);
+ ldev;
+ ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
+ /* first is the lower device itself */
+ ret = fn(ldev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its lower devices */
+ ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
@@ -5590,7 +5737,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
- u16 ref_nr,
struct list_head *dev_list,
void *private, bool master)
{
@@ -5600,7 +5746,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
- adj->ref_nr += ref_nr;
+ adj->ref_nr += 1;
+ pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
+ dev->name, adj_dev->name, adj->ref_nr);
+
return 0;
}
@@ -5610,12 +5759,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->dev = adj_dev;
adj->master = master;
- adj->ref_nr = ref_nr;
+ adj->ref_nr = 1;
adj->private = private;
dev_hold(adj_dev);
- pr_debug("dev_hold for %s, because of link added from %s to %s\n",
- adj_dev->name, dev->name, adj_dev->name);
+ pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
+ dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
@@ -5654,17 +5803,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
{
struct netdev_adjacent *adj;
+ pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
+ dev->name, adj_dev->name, ref_nr);
+
adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
- pr_err("tried to remove device %s from %s\n",
+ pr_err("Adjacency does not exist for device %s from %s\n",
dev->name, adj_dev->name);
- BUG();
+ WARN_ON(1);
+ return;
}
if (adj->ref_nr > ref_nr) {
- pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
- ref_nr, adj->ref_nr-ref_nr);
+ pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
+ dev->name, adj_dev->name, ref_nr,
+ adj->ref_nr - ref_nr);
adj->ref_nr -= ref_nr;
return;
}
@@ -5676,7 +5830,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
list_del_rcu(&adj->list);
- pr_debug("dev_put for %s, because link removed from %s to %s\n",
+ pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
dev_put(adj_dev);
kfree_rcu(adj, rcu);
@@ -5684,38 +5838,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
struct net_device *upper_dev,
- u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list,
void *private, bool master)
{
int ret;
- ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
private, master);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
private, false);
if (ret) {
- __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
return ret;
}
return 0;
}
-static int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *upper_dev,
- u16 ref_nr)
-{
- return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
- &dev->all_adj_list.upper,
- &upper_dev->all_adj_list.lower,
- NULL, false);
-}
-
static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
struct net_device *upper_dev,
u16 ref_nr,
@@ -5726,40 +5869,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
__netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
}
-static void __netdev_adjacent_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev,
- u16 ref_nr)
-{
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
- &dev->all_adj_list.upper,
- &upper_dev->all_adj_list.lower);
-}
-
static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
struct net_device *upper_dev,
void *private, bool master)
{
- int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
-
- if (ret)
- return ret;
-
- ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
- &dev->adj_list.upper,
- &upper_dev->adj_list.lower,
- private, master);
- if (ret) {
- __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
- return ret;
- }
-
- return 0;
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower,
+ private, master);
}
static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
__netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower);
@@ -5770,7 +5892,6 @@ static int __netdev_upper_dev_link(struct net_device *dev,
void *upper_priv, void *upper_info)
{
struct netdev_notifier_changeupper_info changeupper_info;
- struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
ASSERT_RTNL();
@@ -5779,10 +5900,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
+ if (netdev_has_upper_dev(upper_dev, dev))
return -EBUSY;
- if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
+ if (netdev_has_upper_dev(dev, upper_dev))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5804,80 +5925,15 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- /* Now that we linked these devs, make all the upper_dev's
- * all_adj_list.upper visible to every dev's all_adj_list.lower an
- * versa, and don't forget the devices itself. All of these
- * links are non-neighbours.
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
- pr_debug("Interlinking %s with %s, non-neighbour\n",
- i->dev->name, j->dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
- if (ret)
- goto rollback_mesh;
- }
- }
-
- /* add dev to every upper_dev's upper device */
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
- pr_debug("linking %s's upper device %s with %s\n",
- upper_dev->name, i->dev->name, dev->name);
- ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
- if (ret)
- goto rollback_upper_mesh;
- }
-
- /* add upper_dev to every dev's lower device */
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- pr_debug("linking %s's lower device %s with %s\n", dev->name,
- i->dev->name, upper_dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
- if (ret)
- goto rollback_lower_mesh;
- }
-
ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
- goto rollback_lower_mesh;
+ goto rollback;
return 0;
-rollback_lower_mesh:
- to_i = i;
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- if (i == to_i)
- break;
- __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
- }
-
- i = NULL;
-
-rollback_upper_mesh:
- to_i = i;
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
- if (i == to_i)
- break;
- __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
- }
-
- i = j = NULL;
-
-rollback_mesh:
- to_i = i;
- to_j = j;
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
- if (i == to_i && j == to_j)
- break;
- __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
- }
- if (i == to_i)
- break;
- }
-
+rollback:
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
return ret;
@@ -5934,7 +5990,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
struct netdev_notifier_changeupper_info changeupper_info;
- struct netdev_adjacent *i, *j;
ASSERT_RTNL();
changeupper_info.upper_dev = upper_dev;
@@ -5946,23 +6001,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- /* Here is the tricky part. We must remove all dev's lower
- * devices from all upper_dev's upper devices and vice
- * versa, to maintain the graph relationship.
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list)
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
-
- /* remove also the devices itself from lower/upper device
- * list
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list)
- __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
-
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
-
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
}
@@ -6500,9 +6538,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
if (new_mtu == dev->mtu)
return 0;
- /* MTU must be positive. */
- if (new_mtu < 0)
+ /* MTU must be positive, and in range */
+ if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+ net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
+ dev->name, new_mtu, dev->min_mtu);
return -EINVAL;
+ }
+
+ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+ net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
+ dev->name, new_mtu, dev->max_mtu);
+ return -EINVAL;
+ }
if (!netif_device_present(dev))
return -ENODEV;
@@ -6649,26 +6696,42 @@ EXPORT_SYMBOL(dev_change_proto_down);
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
* @fd: new program fd or negative value to clear
+ * @flags: xdp-related flags
*
* Set or clear a bpf program for a device
*/
-int dev_change_xdp_fd(struct net_device *dev, int fd)
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp = {};
+ struct netdev_xdp xdp;
int err;
+ ASSERT_RTNL();
+
if (!ops->ndo_xdp)
return -EOPNOTSUPP;
if (fd >= 0) {
+ if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_QUERY_PROG;
+
+ err = ops->ndo_xdp(dev, &xdp);
+ if (err < 0)
+ return err;
+ if (xdp.prog_attached)
+ return -EBUSY;
+ }
+
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
+ memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
+
err = ops->ndo_xdp(dev, &xdp);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -6777,6 +6840,7 @@ static void rollback_registered_many(struct list_head *head)
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
+ WARN_ON(netdev_has_any_lower_dev(dev));
/* Remove entries from kobject tree */
netdev_unregister_kobject(dev);
@@ -7655,8 +7719,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->adj_list.upper);
INIT_LIST_HEAD(&dev->adj_list.lower);
- INIT_LIST_HEAD(&dev->all_adj_list.upper);
- INIT_LIST_HEAD(&dev->all_adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
#ifdef CONFIG_NET_SCHED
@@ -7667,7 +7729,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
- dev->tx_queue_len = 1;
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
}
dev->num_tx_queues = txqs;
@@ -7948,18 +8010,13 @@ out:
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);
-static int dev_cpu_callback(struct notifier_block *nfb,
- unsigned long action,
- void *ocpu)
+static int dev_cpu_dead(unsigned int oldcpu)
{
struct sk_buff **list_skb;
struct sk_buff *skb;
- unsigned int cpu, oldcpu = (unsigned long)ocpu;
+ unsigned int cpu;
struct softnet_data *sd, *oldsd;
- if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
- return NOTIFY_OK;
-
local_irq_disable();
cpu = smp_processor_id();
sd = &per_cpu(softnet_data, cpu);
@@ -8009,10 +8066,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
input_queue_head_incr(oldsd);
}
- return NOTIFY_OK;
+ return 0;
}
-
/**
* netdev_increment_features - increment feature set by one
* @all: current feature set
@@ -8346,7 +8402,9 @@ static int __init net_dev_init(void)
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
- hotcpu_notifier(dev_cpu_callback, 0);
+ rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
+ NULL, dev_cpu_dead);
+ WARN_ON(rc < 0);
dst_subsys_init();
rc = 0;
out:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1b5063088f1a..2b5bf9efa720 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
}
-static struct genl_family devlink_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = DEVLINK_GENL_NAME,
- .version = DEVLINK_GENL_VERSION,
- .maxattr = DEVLINK_ATTR_MAX,
- .netnsok = true,
- .pre_doit = devlink_nl_pre_doit,
- .post_doit = devlink_nl_post_doit,
-};
+static struct genl_family devlink_nl_family;
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
@@ -608,6 +600,8 @@ static int devlink_port_type_set(struct devlink *devlink,
if (devlink->ops && devlink->ops->port_type_set) {
if (port_type == DEVLINK_PORT_TYPE_NOTSET)
return -EINVAL;
+ if (port_type == devlink_port->type)
+ return 0;
err = devlink->ops->port_type_set(devlink_port, port_type);
if (err)
return err;
@@ -1400,26 +1394,45 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
- u32 seq, int flags, u16 mode)
+ u32 seq, int flags)
{
+ const struct devlink_ops *ops = devlink->ops;
void *hdr;
+ int err = 0;
+ u16 mode;
+ u8 inline_mode;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
if (!hdr)
return -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto out;
- if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
- goto nla_put_failure;
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto out;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto out;
+
+ if (ops->eswitch_inline_mode_get) {
+ err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+ if (err)
+ goto out;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+ inline_mode);
+ if (err)
+ goto out;
+ }
genlmsg_end(msg, hdr);
return 0;
-nla_put_failure:
+out:
genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
+ return err;
}
static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
@@ -1428,22 +1441,17 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
struct sk_buff *msg;
- u16 mode;
int err;
if (!ops || !ops->eswitch_mode_get)
return -EOPNOTSUPP;
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- return err;
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
- info->snd_portid, info->snd_seq, 0, mode);
+ info->snd_portid, info->snd_seq, 0);
if (err) {
nlmsg_free(msg);
@@ -1459,15 +1467,32 @@ static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
u16 mode;
+ u8 inline_mode;
+ int err = 0;
- if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
- return -EINVAL;
+ if (!ops)
+ return -EOPNOTSUPP;
- mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = ops->eswitch_mode_set(devlink, mode);
+ if (err)
+ return err;
+ }
- if (ops && ops->eswitch_mode_set)
- return ops->eswitch_mode_set(devlink, mode);
- return -EOPNOTSUPP;
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+ if (!ops->eswitch_inline_mode_set)
+ return -EOPNOTSUPP;
+ inline_mode = nla_get_u8(
+ info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1484,6 +1509,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1618,6 +1644,20 @@ static const struct genl_ops devlink_nl_ops[] = {
},
};
+static struct genl_family devlink_nl_family __ro_after_init = {
+ .name = DEVLINK_GENL_NAME,
+ .version = DEVLINK_GENL_VERSION,
+ .maxattr = DEVLINK_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = devlink_nl_pre_doit,
+ .post_doit = devlink_nl_post_doit,
+ .module = THIS_MODULE,
+ .ops = devlink_nl_ops,
+ .n_ops = ARRAY_SIZE(devlink_nl_ops),
+ .mcgrps = devlink_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+};
+
/**
* devlink_alloc - Allocate new devlink instance resources
*
@@ -1840,9 +1880,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
static int __init devlink_module_init(void)
{
- return genl_register_family_with_ops_groups(&devlink_nl_family,
- devlink_nl_ops,
- devlink_nl_mcgrps);
+ return genl_register_family(&devlink_nl_family);
}
static void __exit devlink_module_exit(void)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 72cfb0c61125..fb55327dcfea 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -59,12 +59,7 @@ struct dm_hw_stat_delta {
unsigned long last_drop_val;
};
-static struct genl_family net_drop_monitor_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = "NET_DM",
- .version = 2,
-};
+static struct genl_family net_drop_monitor_family;
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
@@ -80,6 +75,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
struct nlattr *nla;
struct sk_buff *skb;
unsigned long flags;
+ void *msg_header;
al = sizeof(struct net_dm_alert_msg);
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
@@ -87,21 +83,41 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
skb = genlmsg_new(al, GFP_KERNEL);
- if (skb) {
- genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
- 0, NET_DM_CMD_ALERT);
- nla = nla_reserve(skb, NLA_UNSPEC,
- sizeof(struct net_dm_alert_msg));
- msg = nla_data(nla);
- memset(msg, 0, al);
- } else {
- mod_timer(&data->send_timer, jiffies + HZ / 10);
+ if (!skb)
+ goto err;
+
+ msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+ 0, NET_DM_CMD_ALERT);
+ if (!msg_header) {
+ nlmsg_free(skb);
+ skb = NULL;
+ goto err;
+ }
+ nla = nla_reserve(skb, NLA_UNSPEC,
+ sizeof(struct net_dm_alert_msg));
+ if (!nla) {
+ nlmsg_free(skb);
+ skb = NULL;
+ goto err;
}
+ msg = nla_data(nla);
+ memset(msg, 0, al);
+ goto out;
+err:
+ mod_timer(&data->send_timer, jiffies + HZ / 10);
+out:
spin_lock_irqsave(&data->lock, flags);
swap(data->skb, skb);
spin_unlock_irqrestore(&data->lock, flags);
+ if (skb) {
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+ struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
+
+ genlmsg_end(skb, genlmsg_data(gnlh));
+ }
+
return skb;
}
@@ -351,6 +367,17 @@ static const struct genl_ops dropmon_ops[] = {
},
};
+static struct genl_family net_drop_monitor_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = "NET_DM",
+ .version = 2,
+ .module = THIS_MODULE,
+ .ops = dropmon_ops,
+ .n_ops = ARRAY_SIZE(dropmon_ops),
+ .mcgrps = dropmon_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
+};
+
static struct notifier_block dropmon_net_notifier = {
.notifier_call = dropmon_net_event
};
@@ -367,8 +394,7 @@ static int __init init_net_drop_monitor(void)
return -ENOSPC;
}
- rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
- dropmon_ops, dropmon_mcgrps);
+ rc = genl_register_family(&net_drop_monitor_family);
if (rc) {
pr_err("Could not create drop monitor netlink family\n");
return rc;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 047a1752ece1..236a21e3c878 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -119,6 +119,12 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
};
+static const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
+};
+
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -227,6 +233,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_TUNABLES)
return ARRAY_SIZE(tunable_strings);
+ if (sset == ETH_SS_PHY_TUNABLES)
+ return ARRAY_SIZE(phy_tunable_strings);
+
if (sset == ETH_SS_PHY_STATS) {
if (dev->phydev)
return phy_get_sset_count(dev->phydev);
@@ -253,6 +262,8 @@ static void __ethtool_get_strings(struct net_device *dev,
sizeof(rss_hash_func_strings));
else if (stringset == ETH_SS_TUNABLES)
memcpy(data, tunable_strings, sizeof(tunable_strings));
+ else if (stringset == ETH_SS_PHY_TUNABLES)
+ memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
else if (stringset == ETH_SS_PHY_STATS) {
struct phy_device *phydev = dev->phydev;
@@ -1701,7 +1712,7 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_channels channels, max;
+ struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
u32 max_rx_in_use = 0;
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
@@ -2422,6 +2433,85 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
};
}
+static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
+{
+ switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ if (tuna->len != sizeof(u8) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U8)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ struct phy_device *phydev = dev->phydev;
+ void *data;
+
+ if (!(phydev && phydev->drv && phydev->drv->get_tunable))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_phy_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->get_tunable(phydev, &tuna, data);
+ mutex_unlock(&phydev->lock);
+ if (ret)
+ goto out;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, data, tuna.len))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ struct phy_device *phydev = dev->phydev;
+ void *data;
+
+ if (!(phydev && phydev->drv && phydev->drv->set_tunable))
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_phy_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr, tuna.len))
+ goto out;
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->set_tunable(phydev, &tuna, data);
+ mutex_unlock(&phydev->lock);
+
+out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2479,6 +2569,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GET_TS_INFO:
case ETHTOOL_GEEE:
case ETHTOOL_GTUNABLE:
+ case ETHTOOL_PHY_GTUNABLE:
case ETHTOOL_GLINKSETTINGS:
break;
default:
@@ -2685,6 +2776,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SLINKSETTINGS:
rc = ethtool_set_link_ksettings(dev, useraddr);
break;
+ case ETHTOOL_PHY_GTUNABLE:
+ rc = get_phy_tunable(dev, useraddr);
+ break;
+ case ETHTOOL_PHY_STUNABLE:
+ rc = set_phy_tunable(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index be4629c344a6..b6791d94841d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -18,6 +18,11 @@
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
+static const struct fib_kuid_range fib_kuid_range_unset = {
+ KUIDT_INIT(0),
+ KUIDT_INIT(~0),
+};
+
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
@@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->table = table;
r->flags = flags;
r->fr_net = ops->fro_net;
+ r->uid_range = fib_kuid_range_unset;
r->suppress_prefixlen = -1;
r->suppress_ifgroup = -1;
@@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);
+static int uid_range_set(struct fib_kuid_range *range)
+{
+ return uid_valid(range->start) && uid_valid(range->end);
+}
+
+static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
+{
+ struct fib_rule_uid_range *in;
+ struct fib_kuid_range out;
+
+ in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
+
+ out.start = make_kuid(current_user_ns(), in->start);
+ out.end = make_kuid(current_user_ns(), in->end);
+
+ return out;
+}
+
+static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
+{
+ struct fib_rule_uid_range out = {
+ from_kuid_munged(current_user_ns(), range->start),
+ from_kuid_munged(current_user_ns(), range->end)
+ };
+
+ return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
goto out;
+ if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
+ uid_gt(fl->flowi_uid, rule->uid_range.end))
+ goto out;
+
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
if (r->l3mdev != rule->l3mdev)
continue;
+ if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+ !uid_eq(r->uid_range.end, rule->uid_range.end))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (rule->l3mdev && rule->table)
goto errout_free;
+ if (tb[FRA_UID_RANGE]) {
+ if (current_user_ns() != net->user_ns) {
+ err = -EPERM;
+ goto errout_free;
+ }
+
+ rule->uid_range = nla_get_kuid_range(tb);
+
+ if (!uid_range_set(&rule->uid_range) ||
+ !uid_lte(rule->uid_range.start, rule->uid_range.end))
+ goto errout_free;
+ } else {
+ rule->uid_range = fib_kuid_range_unset;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *tmp;
struct nlattr *tb[FRA_MAX+1];
+ struct fib_kuid_range range;
int err = -EINVAL;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
@@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
+ if (tb[FRA_UID_RANGE]) {
+ range = nla_get_kuid_range(tb);
+ if (!uid_range_set(&range))
+ goto errout;
+ } else {
+ range = fib_kuid_range_unset;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
if (frh->action && (frh->action != rule->action))
continue;
@@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
(rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
continue;
+ if (uid_range_set(&range) &&
+ (!uid_eq(rule->uid_range.start, range.start) ||
+ !uid_eq(rule->uid_range.end, range.end)))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
- + nla_total_size_64bit(8); /* FRA_TUN_ID */
+ + nla_total_size_64bit(8) /* FRA_TUN_ID */
+ + nla_total_size(sizeof(struct fib_kuid_range));
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->tun_id &&
nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
(rule->l3mdev &&
- nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
+ nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
+ (uid_range_set(&rule->uid_range) &&
+ nla_put_uid_range(skb, &rule->uid_range)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index b391209838ef..1969b3f118c1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -30,6 +30,7 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
+#include <linux/if_arp.h>
#include <linux/gfp.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -39,7 +40,7 @@
#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
@@ -78,6 +79,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
return -ENOMEM;
+ err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
+ if (err)
+ return err;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
@@ -1684,6 +1689,12 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
+ /* Verify that a link layer header is carried */
+ if (unlikely(skb->mac_header >= skb->network_header)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
+
bpf_push_mac_rcsum(skb);
return flags & BPF_F_INGRESS ?
__bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
@@ -1692,17 +1703,10 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
- switch (dev->type) {
- case ARPHRD_TUNNEL:
- case ARPHRD_TUNNEL6:
- case ARPHRD_SIT:
- case ARPHRD_IPGRE:
- case ARPHRD_VOID:
- case ARPHRD_NONE:
- return __bpf_redirect_no_mac(skb, dev, flags);
- default:
+ if (dev_is_mac_header_xmit(dev))
return __bpf_redirect_common(skb, dev, flags);
- }
+ else
+ return __bpf_redirect_no_mac(skb, dev, flags);
}
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2190,16 +2194,79 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING,
};
-bool bpf_helper_changes_skb_data(void *func)
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ u32 max_len = __bpf_skb_max_len(skb);
+ u32 new_len = skb->len + head_room;
+ int ret;
+
+ if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
+ new_len < skb->len))
+ return -EINVAL;
+
+ ret = skb_cow(skb, head_room);
+ if (likely(!ret)) {
+ /* Idea for this helper is that we currently only
+ * allow to expand on mac header. This means that
+ * skb->protocol network header, etc, stay as is.
+ * Compared to bpf_skb_change_tail(), we're more
+ * flexible due to not needing to linearize or
+ * reset GSO. Intention for this helper is to be
+ * used by an L3 skb that needs to push mac header
+ * for redirection into L2 device.
+ */
+ __skb_push(skb, head_room);
+ memset(skb->data, 0, head_room);
+ skb_reset_mac_header(skb);
+ }
+
+ bpf_compute_data_end(skb);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_head_proto = {
+ .func = bpf_skb_change_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
+{
+ void *data = xdp->data + offset;
+
+ if (unlikely(data < xdp->data_hard_start ||
+ data > xdp->data_end - ETH_HLEN))
+ return -EINVAL;
+
+ xdp->data = data;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+ .func = bpf_xdp_adjust_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
func == bpf_skb_vlan_pop ||
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
+ func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
func == bpf_l3_csum_replace ||
- func == bpf_l4_csum_replace)
+ func == bpf_l4_csum_replace ||
+ func == bpf_xdp_adjust_head)
return true;
return false;
@@ -2544,6 +2611,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_raw_smp_processor_id_proto;
+ case BPF_FUNC_get_numa_node_id:
+ return &bpf_get_numa_node_id_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
@@ -2623,12 +2692,87 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_xdp_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_xdp_adjust_head:
+ return &bpf_xdp_adjust_head_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+lwt_inout_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_skb_pull_data:
+ return &bpf_skb_pull_data_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
+ case BPF_FUNC_get_hash_recalc:
+ return &bpf_get_hash_recalc_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_skb_under_cgroup:
+ return &bpf_skb_under_cgroup_proto;
default:
return sk_filter_func_proto(func_id);
}
}
-static bool __is_valid_access(int off, int size, enum bpf_access_type type)
+static const struct bpf_func_proto *
+lwt_xmit_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_get_tunnel_key:
+ return &bpf_skb_get_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_key:
+ return bpf_get_skb_set_tunnel_proto(func_id);
+ case BPF_FUNC_skb_get_tunnel_opt:
+ return &bpf_skb_get_tunnel_opt_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return bpf_get_skb_set_tunnel_proto(func_id);
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
+ case BPF_FUNC_clone_redirect:
+ return &bpf_clone_redirect_proto;
+ case BPF_FUNC_skb_change_tail:
+ return &bpf_skb_change_tail_proto;
+ case BPF_FUNC_skb_change_head:
+ return &bpf_skb_change_head_proto;
+ case BPF_FUNC_skb_store_bytes:
+ return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_csum_update:
+ return &bpf_csum_update_proto;
+ case BPF_FUNC_l3_csum_replace:
+ return &bpf_l3_csum_replace_proto;
+ case BPF_FUNC_l4_csum_replace:
+ return &bpf_l4_csum_replace_proto;
+ case BPF_FUNC_set_hash_invalid:
+ return &bpf_set_hash_invalid_proto;
+ default:
+ return lwt_inout_func_proto(func_id);
+ }
+}
+
+static bool __is_valid_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
@@ -2662,7 +2806,64 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return __is_valid_access(off, size, type);
+ return __is_valid_access(off, size);
+}
+
+static bool lwt_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ switch (off) {
+ case offsetof(struct __sk_buff, tc_classid):
+ return false;
+ }
+
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct __sk_buff, mark):
+ case offsetof(struct __sk_buff, priority):
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case offsetof(struct __sk_buff, data):
+ *reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct __sk_buff, data_end):
+ *reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return __is_valid_access(off, size);
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u32))
+ return false;
+
+ return true;
}
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -2731,11 +2932,10 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
}
- return __is_valid_access(off, size, type);
+ return __is_valid_access(off, size);
}
-static bool __is_valid_xdp_access(int off, int size,
- enum bpf_access_type type)
+static bool __is_valid_xdp_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct xdp_md))
return false;
@@ -2763,7 +2963,7 @@ static bool xdp_is_valid_access(int off, int size,
break;
}
- return __is_valid_xdp_access(off, size, type);
+ return __is_valid_xdp_access(off, size);
}
void bpf_warn_invalid_xdp_action(u32 act)
@@ -2923,6 +3123,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+
+ case offsetof(struct bpf_sock, family):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sock, sk_family));
+ break;
+
+ case offsetof(struct bpf_sock, type):
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, __sk_flags_offset));
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
+ break;
+
+ case offsetof(struct bpf_sock, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, __sk_flags_offset));
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -2990,6 +3235,31 @@ static const struct bpf_verifier_ops xdp_ops = {
.convert_ctx_access = xdp_convert_ctx_access,
};
+static const struct bpf_verifier_ops cg_skb_ops = {
+ .get_func_proto = cg_skb_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops lwt_inout_ops = {
+ .get_func_proto = lwt_inout_func_proto,
+ .is_valid_access = lwt_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops lwt_xmit_ops = {
+ .get_func_proto = lwt_xmit_func_proto,
+ .is_valid_access = lwt_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+ .gen_prologue = tc_cls_act_prologue,
+};
+
+static const struct bpf_verifier_ops cg_sock_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+ .convert_ctx_access = sock_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3010,12 +3280,42 @@ static struct bpf_prog_type_list xdp_type __read_mostly = {
.type = BPF_PROG_TYPE_XDP,
};
+static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+ .ops = &cg_skb_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SKB,
+};
+
+static struct bpf_prog_type_list lwt_in_type __read_mostly = {
+ .ops = &lwt_inout_ops,
+ .type = BPF_PROG_TYPE_LWT_IN,
+};
+
+static struct bpf_prog_type_list lwt_out_type __read_mostly = {
+ .ops = &lwt_inout_ops,
+ .type = BPF_PROG_TYPE_LWT_OUT,
+};
+
+static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
+ .ops = &lwt_xmit_ops,
+ .type = BPF_PROG_TYPE_LWT_XMIT,
+};
+
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+ .ops = &cg_sock_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCK
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
bpf_register_prog_type(&sched_cls_type);
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
+ bpf_register_prog_type(&cg_skb_type);
+ bpf_register_prog_type(&cg_sock_type);
+ bpf_register_prog_type(&lwt_in_type);
+ bpf_register_prog_type(&lwt_out_type);
+ bpf_register_prog_type(&lwt_xmit_type);
return 0;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 18e8893d4be5..f765c11d8df5 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -417,28 +417,20 @@ static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
return 0;
}
-static int flow_cache_cpu(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
{
- struct flow_cache *fc = container_of(nfb, struct flow_cache,
- hotcpu_notifier);
- int res, cpu = (unsigned long) hcpu;
+ struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
+
+ return flow_cache_cpu_prepare(fc, cpu);
+}
+
+static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- res = flow_cache_cpu_prepare(fc, cpu);
- if (res)
- return notifier_from_errno(res);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- __flow_cache_shrink(fc, fcp, 0);
- break;
- }
- return NOTIFY_OK;
+ __flow_cache_shrink(fc, fcp, 0);
+ return 0;
}
int flow_cache_init(struct net *net)
@@ -465,18 +457,8 @@ int flow_cache_init(struct net *net)
if (!fc->percpu)
return -ENOMEM;
- cpu_notifier_register_begin();
-
- for_each_online_cpu(i) {
- if (flow_cache_cpu_prepare(fc, i))
- goto err;
- }
- fc->hotcpu_notifier = (struct notifier_block){
- .notifier_call = flow_cache_cpu,
- };
- __register_hotcpu_notifier(&fc->hotcpu_notifier);
-
- cpu_notifier_register_done();
+ if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
+ goto err;
setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
(unsigned long) fc);
@@ -492,8 +474,6 @@ err:
fcp->hash_table = NULL;
}
- cpu_notifier_register_done();
-
free_percpu(fc->percpu);
fc->percpu = NULL;
@@ -507,7 +487,8 @@ void flow_cache_fini(struct net *net)
struct flow_cache *fc = &net->xfrm.flow_cache_global;
del_timer_sync(&fc->rnd_timer);
- unregister_hotcpu_notifier(&fc->hotcpu_notifier);
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
for_each_possible_cpu(i) {
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
@@ -519,3 +500,14 @@ void flow_cache_fini(struct net *net)
fc->percpu = NULL;
}
EXPORT_SYMBOL(flow_cache_fini);
+
+void __init flow_cache_hp_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
+ "net/flow:prepare",
+ flow_cache_cpu_up_prep,
+ flow_cache_cpu_dead);
+ WARN_ON(ret < 0);
+}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c6d8207ffa7e..1b7673aac59d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
EXPORT_SYMBOL(skb_flow_dissector_init);
/**
+ * skb_flow_get_be16 - extract be16 entity
+ * @skb: sk_buff to extract from
+ * @poff: offset to extract at
+ * @data: raw buffer pointer to the packet
+ * @hlen: packet header length
+ *
+ * The function will try to retrieve a be32 entity at
+ * offset poff
+ */
+static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
+ void *data, int hlen)
+{
+ __be16 *u, _u;
+
+ u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
+ if (u)
+ return *u;
+
+ return 0;
+}
+
+/**
* __skb_flow_get_ports - extract the upper layer ports and return them
* @skb: sk_buff to extract the ports from
* @thoff: transport header offset
@@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct flow_dissector_key_keyid *key_keyid;
@@ -445,8 +468,9 @@ ip_proto_again:
if (hdr->flags & GRE_ACK)
offset += sizeof(((struct pptp_gre_header *)0)->ack);
- ppp_hdr = skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr), _ppp_hdr);
+ ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, hlen, _ppp_hdr);
if (!ppp_hdr)
goto out_bad;
@@ -546,6 +570,14 @@ ip_proto_again:
data, hlen);
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP)) {
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+ key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
+ }
+
out_good:
ret = true;
@@ -726,7 +758,7 @@ EXPORT_SYMBOL(make_flow_keys_digest);
static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
-u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
{
struct flow_keys keys;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cad8e791f28e..0385dece1f6f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -7,13 +7,14 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Eric Dumazet <edumazet@google.com>
*
* Changes:
* Jamal Hadi Salim - moved it to net/core and reshulfed
* names to make it usable in general net subsystem.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -30,165 +31,79 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
-#include <linux/rbtree.h>
#include <linux/slab.h>
+#include <linux/seqlock.h>
#include <net/sock.h>
#include <net/gen_stats.h>
-/*
- This code is NOT intended to be used for statistics collection,
- its purpose is to provide a base for statistical multiplexing
- for controlled load service.
- If you need only statistics, run a user level daemon which
- periodically reads byte counters.
-
- Unfortunately, rate estimation is not a very easy task.
- F.e. I did not find a simple way to estimate the current peak rate
- and even failed to formulate the problem 8)8)
-
- So I preferred not to built an estimator into the scheduler,
- but run this task separately.
- Ideally, it should be kernel thread(s), but for now it runs
- from timers, which puts apparent top bounds on the number of rated
- flows, has minimal overhead on small, but is enough
- to handle controlled load service, sets of aggregates.
-
- We measure rate over A=(1<<interval) seconds and evaluate EWMA:
-
- avrate = avrate*(1-W) + rate*W
-
- where W is chosen as negative power of 2: W = 2^(-ewma_log)
-
- The resulting time constant is:
-
- T = A/(-ln(1-W))
-
-
- NOTES.
-
- * avbps and avpps are scaled by 2^5.
- * both values are reported as 32 bit unsigned values. bps can
- overflow for fast links : max speed being 34360Mbit/sec
- * Minimal interval is HZ/4=250msec (it is the greatest common divisor
- for HZ=100 and HZ=1024 8)), maximal interval
- is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
- are too expensive, longer ones can be implemented
- at user level painlessly.
+/* This code is NOT intended to be used for statistics collection,
+ * its purpose is to provide a base for statistical multiplexing
+ * for controlled load service.
+ * If you need only statistics, run a user level daemon which
+ * periodically reads byte counters.
*/
-#define EST_MAX_INTERVAL 5
-
-struct gen_estimator
-{
- struct list_head list;
+struct net_rate_estimator {
struct gnet_stats_basic_packed *bstats;
- struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
seqcount_t *running;
- int ewma_log;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ u8 ewma_log;
+ u8 intvl_log; /* period : (250ms << intvl_log) */
+
+ seqcount_t seq;
u32 last_packets;
- unsigned long avpps;
u64 last_bytes;
+
+ u64 avpps;
u64 avbps;
- struct rcu_head e_rcu;
- struct rb_node node;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
- struct rcu_head head;
-};
-struct gen_estimator_head
-{
- struct timer_list timer;
- struct list_head list;
+ unsigned long next_jiffies;
+ struct timer_list timer;
+ struct rcu_head rcu;
};
-static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
-
-/* Protects against NULL dereference */
-static DEFINE_RWLOCK(est_lock);
-
-/* Protects against soft lockup during large deletion */
-static struct rb_root est_root = RB_ROOT;
-static DEFINE_SPINLOCK(est_tree_lock);
-
-static void est_timer(unsigned long arg)
+static void est_fetch_counters(struct net_rate_estimator *e,
+ struct gnet_stats_basic_packed *b)
{
- int idx = (int)arg;
- struct gen_estimator *e;
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
- rcu_read_lock();
- list_for_each_entry_rcu(e, &elist[idx].list, list) {
- struct gnet_stats_basic_packed b = {0};
- unsigned long rate;
- u64 brate;
-
- if (e->stats_lock)
- spin_lock(e->stats_lock);
- read_lock(&est_lock);
- if (e->bstats == NULL)
- goto skip;
-
- __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
-
- brate = (b.bytes - e->last_bytes)<<(7 - idx);
- e->last_bytes = b.bytes;
- e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
- WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
-
- rate = b.packets - e->last_packets;
- rate <<= (7 - idx);
- e->last_packets = b.packets;
- e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
-skip:
- read_unlock(&est_lock);
- if (e->stats_lock)
- spin_unlock(e->stats_lock);
- }
+ __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
+
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
- if (!list_empty(&elist[idx].list))
- mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
- rcu_read_unlock();
}
-static void gen_add_node(struct gen_estimator *est)
+static void est_timer(unsigned long arg)
{
- struct rb_node **p = &est_root.rb_node, *parent = NULL;
+ struct net_rate_estimator *est = (struct net_rate_estimator *)arg;
+ struct gnet_stats_basic_packed b;
+ u64 rate, brate;
- while (*p) {
- struct gen_estimator *e;
+ est_fetch_counters(est, &b);
+ brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log);
+ brate -= (est->avbps >> est->ewma_log);
- parent = *p;
- e = rb_entry(parent, struct gen_estimator, node);
+ rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log);
+ rate -= (est->avpps >> est->ewma_log);
- if (est->bstats > e->bstats)
- p = &parent->rb_right;
- else
- p = &parent->rb_left;
- }
- rb_link_node(&est->node, parent, p);
- rb_insert_color(&est->node, &est_root);
-}
+ write_seqcount_begin(&est->seq);
+ est->avbps += brate;
+ est->avpps += rate;
+ write_seqcount_end(&est->seq);
-static
-struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est64 *rate_est)
-{
- struct rb_node *p = est_root.rb_node;
-
- while (p) {
- struct gen_estimator *e;
+ est->last_bytes = b.bytes;
+ est->last_packets = b.packets;
- e = rb_entry(p, struct gen_estimator, node);
+ est->next_jiffies += ((HZ/4) << est->intvl_log);
- if (bstats > e->bstats)
- p = p->rb_right;
- else if (bstats < e->bstats || rate_est != e->rate_est)
- p = p->rb_left;
- else
- return e;
+ if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
+ /* Ouch... timer was delayed. */
+ est->next_jiffies = jiffies + 1;
}
- return NULL;
+ mod_timer(&est->timer, est->next_jiffies);
}
/**
@@ -211,83 +126,76 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct gnet_stats_rate_est64 *rate_est,
+ struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
seqcount_t *running,
struct nlattr *opt)
{
- struct gen_estimator *est;
struct gnet_estimator *parm = nla_data(opt);
- struct gnet_stats_basic_packed b = {0};
- int idx;
+ struct net_rate_estimator *old, *est;
+ struct gnet_stats_basic_packed b;
+ int intvl_log;
if (nla_len(opt) < sizeof(*parm))
return -EINVAL;
+ /* allowed timer periods are :
+ * -2 : 250ms, -1 : 500ms, 0 : 1 sec
+ * 1 : 2 sec, 2 : 4 sec, 3 : 8 sec
+ */
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
est = kzalloc(sizeof(*est), GFP_KERNEL);
- if (est == NULL)
+ if (!est)
return -ENOBUFS;
- __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
-
- idx = parm->interval + 2;
+ seqcount_init(&est->seq);
+ intvl_log = parm->interval + 2;
est->bstats = bstats;
- est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->running = running;
est->ewma_log = parm->ewma_log;
- est->last_bytes = b.bytes;
- est->avbps = rate_est->bps<<5;
- est->last_packets = b.packets;
- est->avpps = rate_est->pps<<10;
+ est->intvl_log = intvl_log;
est->cpu_bstats = cpu_bstats;
- spin_lock_bh(&est_tree_lock);
- if (!elist[idx].timer.function) {
- INIT_LIST_HEAD(&elist[idx].list);
- setup_timer(&elist[idx].timer, est_timer, idx);
+ est_fetch_counters(est, &b);
+ est->last_bytes = b.bytes;
+ est->last_packets = b.packets;
+ old = rcu_dereference_protected(*rate_est, 1);
+ if (old) {
+ del_timer_sync(&old->timer);
+ est->avbps = old->avbps;
+ est->avpps = old->avpps;
}
- if (list_empty(&elist[idx].list))
- mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
-
- list_add_rcu(&est->list, &elist[idx].list);
- gen_add_node(est);
- spin_unlock_bh(&est_tree_lock);
+ est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
+ setup_timer(&est->timer, est_timer, (unsigned long)est);
+ mod_timer(&est->timer, est->next_jiffies);
+ rcu_assign_pointer(*rate_est, est);
+ if (old)
+ kfree_rcu(old, rcu);
return 0;
}
EXPORT_SYMBOL(gen_new_estimator);
/**
* gen_kill_estimator - remove a rate estimator
- * @bstats: basic statistics
- * @rate_est: rate estimator statistics
+ * @rate_est: rate estimator
*
- * Removes the rate estimator specified by &bstats and &rate_est.
+ * Removes the rate estimator.
*
- * Note : Caller should respect an RCU grace period before freeing stats_lock
*/
-void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est64 *rate_est)
+void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
{
- struct gen_estimator *e;
-
- spin_lock_bh(&est_tree_lock);
- while ((e = gen_find_node(bstats, rate_est))) {
- rb_erase(&e->node, &est_root);
+ struct net_rate_estimator *est;
- write_lock(&est_lock);
- e->bstats = NULL;
- write_unlock(&est_lock);
-
- list_del_rcu(&e->list);
- kfree_rcu(e, e_rcu);
+ est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
+ if (est) {
+ del_timer_sync(&est->timer);
+ kfree_rcu(est, rcu);
}
- spin_unlock_bh(&est_tree_lock);
}
EXPORT_SYMBOL(gen_kill_estimator);
@@ -307,33 +215,47 @@ EXPORT_SYMBOL(gen_kill_estimator);
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct gnet_stats_rate_est64 *rate_est,
+ struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
seqcount_t *running, struct nlattr *opt)
{
- gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
+ return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ stats_lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
/**
* gen_estimator_active - test if estimator is currently in use
- * @bstats: basic statistics
- * @rate_est: rate estimator statistics
+ * @rate_est: rate estimator
*
* Returns true if estimator is active, and false if not.
*/
-bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est64 *rate_est)
+bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est)
{
- bool res;
+ return !!rcu_access_pointer(*rate_est);
+}
+EXPORT_SYMBOL(gen_estimator_active);
- ASSERT_RTNL();
+bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
+ struct gnet_stats_rate_est64 *sample)
+{
+ struct net_rate_estimator *est;
+ unsigned seq;
+
+ rcu_read_lock();
+ est = rcu_dereference(*rate_est);
+ if (!est) {
+ rcu_read_unlock();
+ return false;
+ }
- spin_lock_bh(&est_tree_lock);
- res = gen_find_node(bstats, rate_est) != NULL;
- spin_unlock_bh(&est_tree_lock);
+ do {
+ seq = read_seqcount_begin(&est->seq);
+ sample->bps = est->avbps >> 8;
+ sample->pps = est->avpps >> 8;
+ } while (read_seqcount_retry(&est->seq, seq));
- return res;
+ rcu_read_unlock();
+ return true;
}
-EXPORT_SYMBOL(gen_estimator_active);
+EXPORT_SYMBOL(gen_estimator_read);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 508e051304fb..87f28557b329 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -194,8 +194,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
/**
* gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
* @d: dumping handle
- * @b: basic statistics
- * @r: rate estimator statistics
+ * @rate_est: rate estimator
*
* Appends the rate estimator statistics to the top level TLV created by
* gnet_stats_start_copy().
@@ -205,18 +204,17 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
*/
int
gnet_stats_copy_rate_est(struct gnet_dump *d,
- const struct gnet_stats_basic_packed *b,
- struct gnet_stats_rate_est64 *r)
+ struct net_rate_estimator __rcu **rate_est)
{
+ struct gnet_stats_rate_est64 sample;
struct gnet_stats_rate_est est;
int res;
- if (b && !gen_estimator_active(b, r))
+ if (!gen_estimator_read(rate_est, &sample))
return 0;
-
- est.bps = min_t(u64, UINT_MAX, r->bps);
+ est.bps = min_t(u64, UINT_MAX, sample.bps);
/* we have some time before reaching 2^32 packets per second */
- est.pps = r->pps;
+ est.pps = sample.pps;
if (d->compat_tc_stats) {
d->tc_stats.bps = est.bps;
@@ -226,11 +224,11 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
if (d->tail) {
res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est),
TCA_STATS_PAD);
- if (res < 0 || est.bps == r->bps)
+ if (res < 0 || est.bps == sample.bps)
return res;
/* emit 64bit stats only if needed */
- return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r),
- TCA_STATS_PAD);
+ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, &sample,
+ sizeof(sample), TCA_STATS_PAD);
}
return 0;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
new file mode 100644
index 000000000000..b3eef90b2df9
--- /dev/null
+++ b/net/core/lwt_bpf.c
@@ -0,0 +1,397 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <net/lwtunnel.h>
+
+struct bpf_lwt_prog {
+ struct bpf_prog *prog;
+ char *name;
+};
+
+struct bpf_lwt {
+ struct bpf_lwt_prog in;
+ struct bpf_lwt_prog out;
+ struct bpf_lwt_prog xmit;
+ int family;
+};
+
+#define MAX_PROG_NAME 256
+
+static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+ return (struct bpf_lwt *)lwt->data;
+}
+
+#define NO_REDIRECT false
+#define CAN_REDIRECT true
+
+static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
+ struct dst_entry *dst, bool can_redirect)
+{
+ int ret;
+
+ /* Preempt disable is needed to protect per-cpu redirect_info between
+ * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
+ * access to maps strictly require a rcu_read_lock() for protection,
+ * mixing with BH RCU lock doesn't work.
+ */
+ preempt_disable();
+ rcu_read_lock();
+ bpf_compute_data_end(skb);
+ ret = bpf_prog_run_save_cb(lwt->prog, skb);
+ rcu_read_unlock();
+
+ switch (ret) {
+ case BPF_OK:
+ break;
+
+ case BPF_REDIRECT:
+ if (unlikely(!can_redirect)) {
+ pr_warn_once("Illegal redirect return code in prog %s\n",
+ lwt->name ? : "<unknown>");
+ ret = BPF_OK;
+ } else {
+ ret = skb_do_redirect(skb);
+ if (ret == 0)
+ ret = BPF_REDIRECT;
+ }
+ break;
+
+ case BPF_DROP:
+ kfree_skb(skb);
+ ret = -EPERM;
+ break;
+
+ default:
+ pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret);
+ kfree_skb(skb);
+ ret = -EINVAL;
+ break;
+ }
+
+ preempt_enable();
+
+ return ret;
+}
+
+static int bpf_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct bpf_lwt *bpf;
+ int ret;
+
+ bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+ if (bpf->in.prog) {
+ ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (unlikely(!dst->lwtstate->orig_input)) {
+ pr_warn_once("orig_input not set on dst for prog %s\n",
+ bpf->out.name);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return dst->lwtstate->orig_input(skb);
+}
+
+static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct bpf_lwt *bpf;
+ int ret;
+
+ bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+ if (bpf->out.prog) {
+ ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (unlikely(!dst->lwtstate->orig_output)) {
+ pr_warn_once("orig_output not set on dst for prog %s\n",
+ bpf->out.name);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return dst->lwtstate->orig_output(net, sk, skb);
+}
+
+static int xmit_check_hhlen(struct sk_buff *skb)
+{
+ int hh_len = skb_dst(skb)->dev->hard_header_len;
+
+ if (skb_headroom(skb) < hh_len) {
+ int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+ if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int bpf_xmit(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct bpf_lwt *bpf;
+
+ bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+ if (bpf->xmit.prog) {
+ int ret;
+
+ ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
+ switch (ret) {
+ case BPF_OK:
+ /* If the header was expanded, headroom might be too
+ * small for L2 header to come, expand as needed.
+ */
+ ret = xmit_check_hhlen(skb);
+ if (unlikely(ret))
+ return ret;
+
+ return LWTUNNEL_XMIT_CONTINUE;
+ case BPF_REDIRECT:
+ return LWTUNNEL_XMIT_DONE;
+ default:
+ return ret;
+ }
+ }
+
+ return LWTUNNEL_XMIT_CONTINUE;
+}
+
+static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog)
+{
+ if (prog->prog)
+ bpf_prog_put(prog->prog);
+
+ kfree(prog->name);
+}
+
+static void bpf_destroy_state(struct lwtunnel_state *lwt)
+{
+ struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+ bpf_lwt_prog_destroy(&bpf->in);
+ bpf_lwt_prog_destroy(&bpf->out);
+ bpf_lwt_prog_destroy(&bpf->xmit);
+}
+
+static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = {
+ [LWT_BPF_PROG_FD] = { .type = NLA_U32, },
+ [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+ .len = MAX_PROG_NAME },
+};
+
+static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
+ enum bpf_prog_type type)
+{
+ struct nlattr *tb[LWT_BPF_PROG_MAX + 1];
+ struct bpf_prog *p;
+ int ret;
+ u32 fd;
+
+ ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
+ return -EINVAL;
+
+ prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
+ if (!prog->name)
+ return -ENOMEM;
+
+ fd = nla_get_u32(tb[LWT_BPF_PROG_FD]);
+ p = bpf_prog_get_type(fd, type);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ prog->prog = p;
+
+ return 0;
+}
+
+static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
+ [LWT_BPF_IN] = { .type = NLA_NESTED, },
+ [LWT_BPF_OUT] = { .type = NLA_NESTED, },
+ [LWT_BPF_XMIT] = { .type = NLA_NESTED, },
+ [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
+};
+
+static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts)
+{
+ struct nlattr *tb[LWT_BPF_MAX + 1];
+ struct lwtunnel_state *newts;
+ struct bpf_lwt *bpf;
+ int ret;
+
+ if (family != AF_INET && family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT])
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(sizeof(*bpf));
+ if (!newts)
+ return -ENOMEM;
+
+ newts->type = LWTUNNEL_ENCAP_BPF;
+ bpf = bpf_lwt_lwtunnel(newts);
+
+ if (tb[LWT_BPF_IN]) {
+ newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+ ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in,
+ BPF_PROG_TYPE_LWT_IN);
+ if (ret < 0)
+ goto errout;
+ }
+
+ if (tb[LWT_BPF_OUT]) {
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+ ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out,
+ BPF_PROG_TYPE_LWT_OUT);
+ if (ret < 0)
+ goto errout;
+ }
+
+ if (tb[LWT_BPF_XMIT]) {
+ newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+ ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit,
+ BPF_PROG_TYPE_LWT_XMIT);
+ if (ret < 0)
+ goto errout;
+ }
+
+ if (tb[LWT_BPF_XMIT_HEADROOM]) {
+ u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]);
+
+ if (headroom > LWT_BPF_MAX_HEADROOM) {
+ ret = -ERANGE;
+ goto errout;
+ }
+
+ newts->headroom = headroom;
+ }
+
+ bpf->family = family;
+ *ts = newts;
+
+ return 0;
+
+errout:
+ bpf_destroy_state(newts);
+ kfree(newts);
+ return ret;
+}
+
+static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr,
+ struct bpf_lwt_prog *prog)
+{
+ struct nlattr *nest;
+
+ if (!prog->prog)
+ return 0;
+
+ nest = nla_nest_start(skb, attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (prog->name &&
+ nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name))
+ return -EMSGSIZE;
+
+ return nla_nest_end(skb, nest);
+}
+
+static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt)
+{
+ struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+ if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 ||
+ bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 ||
+ bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ int nest_len = nla_total_size(sizeof(struct nlattr)) +
+ nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */
+ 0;
+
+ return nest_len + /* LWT_BPF_IN */
+ nest_len + /* LWT_BPF_OUT */
+ nest_len + /* LWT_BPF_XMIT */
+ 0;
+}
+
+int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
+{
+ /* FIXME:
+ * The LWT state is currently rebuilt for delete requests which
+ * results in a new bpf_prog instance. Comparing names for now.
+ */
+ if (!a->name && !b->name)
+ return 0;
+
+ if (!a->name || !b->name)
+ return 1;
+
+ return strcmp(a->name, b->name);
+}
+
+static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a);
+ struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b);
+
+ return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) ||
+ bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) ||
+ bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit);
+}
+
+static const struct lwtunnel_encap_ops bpf_encap_ops = {
+ .build_state = bpf_build_state,
+ .destroy_state = bpf_destroy_state,
+ .input = bpf_input,
+ .output = bpf_output,
+ .xmit = bpf_xmit,
+ .fill_encap = bpf_fill_encap_info,
+ .get_encap_size = bpf_encap_nlsize,
+ .cmp_encap = bpf_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+static int __init bpf_lwt_init(void)
+{
+ return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
+}
+
+subsys_initcall(bpf_lwt_init)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index e5f84c26ba1a..c23465005f2f 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -26,6 +26,7 @@
#include <net/lwtunnel.h>
#include <net/rtnetlink.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#ifdef CONFIG_MODULES
@@ -39,6 +40,10 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "MPLS";
case LWTUNNEL_ENCAP_ILA:
return "ILA";
+ case LWTUNNEL_ENCAP_SEG6:
+ return "SEG6";
+ case LWTUNNEL_ENCAP_BPF:
+ return "BPF";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
case LWTUNNEL_ENCAP_NONE:
@@ -110,25 +115,91 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
+ if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
+ ret = ops->build_state(dev, encap, family, cfg, lws);
+ if (ret)
+ module_put(ops->owner);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_valid_encap_type(u16 encap_type)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = -EINVAL;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return ret;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[encap_type]);
+ rcu_read_unlock();
#ifdef CONFIG_MODULES
if (!ops) {
const char *encap_type_str = lwtunnel_encap_str(encap_type);
if (encap_type_str) {
- rcu_read_unlock();
+ __rtnl_unlock();
request_module("rtnl-lwt-%s", encap_type_str);
+ rtnl_lock();
+
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
+ rcu_read_unlock();
}
}
#endif
- if (likely(ops && ops->build_state))
- ret = ops->build_state(dev, encap, family, cfg, lws);
- rcu_read_unlock();
+ return ops ? 0 : -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(lwtunnel_valid_encap_type);
- return ret;
+int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
+{
+ struct rtnexthop *rtnh = (struct rtnexthop *)attr;
+ struct nlattr *nla_entype;
+ struct nlattr *attrs;
+ struct nlattr *nla;
+ u16 encap_type;
+ int attrlen;
+
+ while (rtnh_ok(rtnh, remaining)) {
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ attrs = rtnh_attrs(rtnh);
+ nla = nla_find(attrs, attrlen, RTA_ENCAP);
+ nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+
+ if (nla_entype) {
+ encap_type = nla_get_u16(nla_entype);
+
+ if (lwtunnel_valid_encap_type(encap_type) != 0)
+ return -EOPNOTSUPP;
+ }
+ }
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return 0;
}
-EXPORT_SYMBOL(lwtunnel_build_state);
+EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
+
+void lwtstate_free(struct lwtunnel_state *lws)
+{
+ const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
+
+ if (ops->destroy_state) {
+ ops->destroy_state(lws);
+ kfree_rcu(lws, rcu);
+ } else {
+ kfree(lws);
+ }
+ module_put(ops->owner);
+}
+EXPORT_SYMBOL(lwtstate_free);
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
{
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2ae929f9bd06..7bb12e07ffef 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -100,6 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
neigh->parms->neigh_cleanup(neigh);
__neigh_notify(neigh, RTM_DELNEIGH, 0);
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -2291,13 +2292,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
n != NULL;
n = rcu_dereference_bh(n->next)) {
- if (!net_eq(dev_net(n->dev), net))
- continue;
- if (neigh_ifindex_filtered(n->dev, filter_idx))
- continue;
- if (neigh_master_filtered(n->dev, filter_master_idx))
- continue;
- if (idx < s_idx)
+ if (idx < s_idx || !net_eq(dev_net(n->dev), net))
+ goto next;
+ if (neigh_ifindex_filtered(n->dev, filter_idx) ||
+ neigh_master_filtered(n->dev, filter_master_idx))
goto next;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -2332,9 +2330,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (pneigh_net(n) != net)
- continue;
- if (idx < s_idx)
+ if (idx < s_idx || pneigh_net(n) != net)
goto next;
if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6e4f34721080..b0c04cf4851d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
+ struct kobject *kobj = &dev->_rx[i].kobj;
+
+ if (!list_empty(&dev_net(dev)->exit_list))
+ kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
- sysfs_remove_group(&dev->_rx[i].kobj,
- dev->sysfs_rx_queue_group);
- kobject_put(&dev->_rx[i].kobj);
+ sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+ kobject_put(kobj);
}
return error;
@@ -1021,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
return sprintf(buf, "%lu", trans_timeout);
}
-#ifdef CONFIG_XPS
static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
@@ -1033,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
+static ssize_t show_traffic_class(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ struct net_device *dev = queue->dev;
+ int index = get_netdev_queue_index(queue);
+ int tc = netdev_txq_to_tc(dev, index);
+
+ if (tc < 0)
+ return -EINVAL;
+
+ return sprintf(buf, "%u\n", tc);
+}
+
+#ifdef CONFIG_XPS
static ssize_t show_tx_maxrate(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute,
char *buf)
@@ -1075,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate =
static struct netdev_queue_attribute queue_trans_timeout =
__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+static struct netdev_queue_attribute queue_traffic_class =
+ __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+
#ifdef CONFIG_BQL
/*
* Byte queue limits sysfs structures and functions.
@@ -1190,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute, char *buf)
{
struct net_device *dev = queue->dev;
+ int cpu, len, num_tc = 1, tc = 0;
struct xps_dev_maps *dev_maps;
cpumask_var_t mask;
unsigned long index;
- int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
index = get_netdev_queue_index(queue);
+ if (dev->num_tc) {
+ num_tc = dev->num_tc;
+ tc = netdev_txq_to_tc(dev, index);
+ if (tc < 0)
+ return -EINVAL;
+ }
+
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
- for_each_possible_cpu(i) {
- struct xps_map *map =
- rcu_dereference(dev_maps->cpu_map[i]);
- if (map) {
- int j;
- for (j = 0; j < map->len; j++) {
- if (map->queues[j] == index) {
- cpumask_set_cpu(i, mask);
- break;
- }
+ for_each_possible_cpu(cpu) {
+ int i, tci = cpu * num_tc + tc;
+ struct xps_map *map;
+
+ map = rcu_dereference(dev_maps->cpu_map[tci]);
+ if (!map)
+ continue;
+
+ for (i = map->len; i--;) {
+ if (map->queues[i] == index) {
+ cpumask_set_cpu(cpu, mask);
+ break;
}
}
}
@@ -1260,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute =
static struct attribute *netdev_queue_default_attrs[] = {
&queue_trans_timeout.attr,
+ &queue_traffic_class.attr,
#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
&queue_tx_maxrate.attr,
@@ -1340,6 +1370,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
+ if (!list_empty(&dev_net(dev)->exit_list))
+ queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
#endif
@@ -1525,6 +1557,9 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &(ndev->dev);
+ if (!list_empty(&dev_net(ndev)->exit_list))
+ dev_set_uevent_suppress(dev, 1);
+
kobject_get(&dev->kobj);
remove_queue_kobjects(ndev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7001da910c6b..3c4bbec39713 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -39,6 +39,9 @@ EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+#define MIN_PERNET_OPS_ID \
+ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
+
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -46,27 +49,28 @@ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
static struct net_generic *net_alloc_generic(void)
{
struct net_generic *ng;
- size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+ unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->len = max_gen_ptrs;
+ ng->s.len = max_gen_ptrs;
return ng;
}
-static int net_assign_generic(struct net *net, int id, void *data)
+static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;
BUG_ON(!mutex_is_locked(&net_mutex));
- BUG_ON(id == 0);
+ BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&net_mutex));
- ng = old_ng;
- if (old_ng->len >= id)
- goto assign;
+ if (old_ng->s.len > id) {
+ old_ng->ptr[id] = data;
+ return 0;
+ }
ng = net_alloc_generic();
if (ng == NULL)
@@ -83,12 +87,12 @@ static int net_assign_generic(struct net *net, int id, void *data)
* the old copy for kfree after a grace period.
*/
- memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+ memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
+ (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
+ ng->ptr[id] = data;
rcu_assign_pointer(net->gen, ng);
- kfree_rcu(old_ng, rcu);
-assign:
- ng->ptr[id - 1] = data;
+ kfree_rcu(old_ng, s.rcu);
return 0;
}
@@ -122,8 +126,7 @@ out:
static void ops_free(const struct pernet_operations *ops, struct net *net)
{
if (ops->id && ops->size) {
- int id = *ops->id;
- kfree(net_generic(net, id));
+ kfree(net_generic(net, *ops->id));
}
}
@@ -215,16 +218,15 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
- unsigned long flags;
bool alloc;
int id;
if (atomic_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
@@ -233,12 +235,11 @@ int peernet2id_alloc(struct net *net, struct net *peer)
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- unsigned long flags;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
id = __peernet2id(net, peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
return id;
}
EXPORT_SYMBOL(peernet2id);
@@ -253,18 +254,17 @@ bool peernet_has_id(struct net *net, struct net *peer)
struct net *get_net_ns_by_id(struct net *net, int id)
{
- unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
return peer;
@@ -384,7 +384,14 @@ struct net *copy_net_ns(unsigned long flags,
get_user_ns(user_ns);
- mutex_lock(&net_mutex);
+ rv = mutex_lock_killable(&net_mutex);
+ if (rv < 0) {
+ net_free(net);
+ dec_net_namespaces(ucounts);
+ put_user_ns(user_ns);
+ return ERR_PTR(rv);
+ }
+
net->ucounts = ucounts;
rv = setup_net(net, user_ns);
if (rv == 0) {
@@ -427,17 +434,17 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id;
- spin_lock_irq(&tmp->nsid_lock);
+ spin_lock_bh(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_irq(&tmp->nsid_lock);
+ spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
- spin_lock_irq(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_irq(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
}
rtnl_unlock();
@@ -566,7 +573,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
- unsigned long flags;
struct net *peer;
int nsid, err;
@@ -587,15 +593,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
@@ -717,11 +723,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
- unsigned long flags;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
cb->args[0] = net_cb.idx;
return skb->len;
@@ -868,7 +873,7 @@ static int register_pernet_operations(struct list_head *list,
if (ops->id) {
again:
- error = ida_get_new_above(&net_generic_ids, 1, ops->id);
+ error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
if (error < 0) {
if (error == -EAGAIN) {
ida_pre_get(&net_generic_ids, GFP_KERNEL);
@@ -876,7 +881,7 @@ again:
}
return error;
}
- max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
+ max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 53599bd0c82d..9424673009c1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -171,12 +171,12 @@ static void poll_one_napi(struct napi_struct *napi)
static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
+ int cpu = smp_processor_id();
list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner != smp_processor_id() &&
- spin_trylock(&napi->poll_lock)) {
+ if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
poll_one_napi(napi);
- spin_unlock(&napi->poll_lock);
+ smp_store_release(&napi->poll_owner, -1);
}
}
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 306b8f0e03c1..8e69ce472236 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -413,7 +413,7 @@ struct pktgen_hdr {
};
-static int pg_net_id __read_mostly;
+static unsigned int pg_net_id __read_mostly;
struct pktgen_net {
struct net *net;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a6196cf844f6..75e3ea7bda08 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -40,7 +40,7 @@
#include <linux/pci.h>
#include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -1505,6 +1505,7 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
[IFLA_XDP_FD] = { .type = NLA_S32 },
[IFLA_XDP_ATTACHED] = { .type = NLA_U8 },
+ [IFLA_XDP_FLAGS] = { .type = NLA_U32 },
};
static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
@@ -2164,6 +2165,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_XDP]) {
struct nlattr *xdp[IFLA_XDP_MAX + 1];
+ u32 xdp_flags = 0;
err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
ifla_xdp_policy);
@@ -2174,9 +2176,19 @@ static int do_setlink(const struct sk_buff *skb,
err = -EINVAL;
goto errout;
}
+
+ if (xdp[IFLA_XDP_FLAGS]) {
+ xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
+ if (xdp_flags & ~XDP_FLAGS_MASK) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+
if (xdp[IFLA_XDP_FD]) {
err = dev_change_xdp_fd(dev,
- nla_get_s32(xdp[IFLA_XDP_FD]));
+ nla_get_s32(xdp[IFLA_XDP_FD]),
+ xdp_flags);
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
@@ -3165,7 +3177,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
if (err)
goto out;
- nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
+ err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
out:
netif_addr_unlock_bh(dev);
return err;
@@ -3671,7 +3683,7 @@ static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
if (!size)
continue;
- if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+ if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
continue;
attr = nla_reserve_64bit(skb, attr_id, size,
@@ -3712,7 +3724,7 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+ if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
continue;
size = rtnl_get_offload_stats_attr_size(attr_id);
nla_size += nla_total_size_64bit(size);
@@ -3886,6 +3898,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 filter_mask;
int err;
+ if (nlmsg_len(nlh) < sizeof(*ifsm))
+ return -EINVAL;
+
ifsm = nlmsg_data(nlh);
if (ifsm->ifindex > 0)
dev = __dev_get_by_index(net, ifsm->ifindex);
@@ -3935,6 +3950,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = net->dev_base_seq;
+ if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
+ return -EINVAL;
+
ifsm = nlmsg_data(cb->nlh);
filter_mask = ifsm->filter_mask;
if (!filter_mask)
diff --git a/net/core/scm.c b/net/core/scm.c
index 2696aefdc148..d8820438ba37 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,7 +29,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index fd3ce461fbe6..88a8e429fc3e 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -12,6 +12,7 @@
#include <net/secure_seq.h>
#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#include <net/tcp.h>
#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
@@ -40,8 +41,8 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport)
+u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u32 secret[MD5_MESSAGE_BYTES / 4];
u32 hash[MD5_DIGEST_WORDS];
@@ -58,6 +59,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
md5_transform(hash, secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
return seq_scale(hash[0]);
}
EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -86,8 +88,8 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
-__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport)
+u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u32 hash[MD5_DIGEST_WORDS];
@@ -99,6 +101,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
md5_transform(hash, net_secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
return seq_scale(hash[0]);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e3e0087245b..734c71468b01 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -72,7 +72,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
#include <linux/capability.h>
@@ -354,7 +354,7 @@ EXPORT_SYMBOL(build_skb);
struct napi_alloc_cache {
struct page_frag_cache page;
- size_t skb_count;
+ unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -369,7 +369,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
local_irq_save(flags);
nc = this_cpu_ptr(&netdev_alloc_cache);
- data = __alloc_page_frag(nc, fragsz, gfp_mask);
+ data = page_frag_alloc(nc, fragsz, gfp_mask);
local_irq_restore(flags);
return data;
}
@@ -391,7 +391,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
+ return page_frag_alloc(&nc->page, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
@@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
local_irq_save(flags);
nc = this_cpu_ptr(&netdev_alloc_cache);
- data = __alloc_page_frag(nc, len, gfp_mask);
+ data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
local_irq_restore(flags);
@@ -505,7 +505,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = __alloc_page_frag(&nc->page, len, gfp_mask);
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
if (unlikely(!data))
return NULL;
@@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
struct skb_frag_struct *fragfrom, *fragto;
BUG_ON(shiftlen > skb->len);
- BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
+
+ if (skb_headlen(skb))
+ return 0;
todo = shiftlen;
from = 0;
@@ -3712,21 +3714,29 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_queue_err_skb);
+static bool is_icmp_err_skb(const struct sk_buff *skb)
+{
+ return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+ SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
+}
+
struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
{
struct sk_buff_head *q = &sk->sk_error_queue;
- struct sk_buff *skb, *skb_next;
+ struct sk_buff *skb, *skb_next = NULL;
+ bool icmp_next = false;
unsigned long flags;
- int err = 0;
spin_lock_irqsave(&q->lock, flags);
skb = __skb_dequeue(q);
if (skb && (skb_next = skb_peek(q)))
- err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
+ icmp_next = is_icmp_err_skb(skb_next);
spin_unlock_irqrestore(&q->lock, flags);
- sk->sk_err = err;
- if (err)
+ if (is_icmp_err_skb(skb) && !icmp_next)
+ sk->sk_err = 0;
+
+ if (skb_next)
sk->sk_error_report(sk);
return skb;
@@ -3838,10 +3848,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!skb_may_tx_timestamp(sk, tsonly))
return;
- if (tsonly)
- skb = alloc_skb(0, GFP_ATOMIC);
- else
+ if (tsonly) {
+#ifdef CONFIG_INET
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+ sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
+ skb = tcp_get_timestamping_opt_stats(sk);
+ else
+#endif
+ skb = alloc_skb(0, GFP_ATOMIC);
+ } else {
skb = skb_clone(orig_skb, GFP_ATOMIC);
+ }
if (!skb)
return;
@@ -4350,7 +4368,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
*/
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb->tstamp.tv64 = 0;
+ skb->tstamp = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
@@ -4913,3 +4931,35 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
return clone;
}
EXPORT_SYMBOL(pskb_extract);
+
+/**
+ * skb_condense - try to get rid of fragments/frag_list if possible
+ * @skb: buffer
+ *
+ * Can be used to save memory before skb is added to a busy queue.
+ * If packet has bytes in frags and enough tail room in skb->head,
+ * pull all of them, so that we can free the frags right now and adjust
+ * truesize.
+ * Notes:
+ * We do not reallocate skb->head thus can not fail.
+ * Caller must re-evaluate skb->truesize if needed.
+ */
+void skb_condense(struct sk_buff *skb)
+{
+ if (skb->data_len) {
+ if (skb->data_len > skb->end - skb->tail ||
+ skb_cloned(skb))
+ return;
+
+ /* Nice, we can free page frag(s) right now */
+ __pskb_pull_tail(skb, skb->data_len);
+ }
+ /* At this point, skb->truesize might be over estimated,
+ * because skb had a fragment, and fragments do not tell
+ * their truesize.
+ * When we pulled its content into skb->head, fragment
+ * was freed, but __pskb_pull_tail() could not possibly
+ * adjust skb->truesize, not knowing the frag truesize.
+ */
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+}
diff --git a/net/core/sock.c b/net/core/sock.c
index 00a074dbfe9b..4eca27dc5c94 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,7 +118,7 @@
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <net/protocol.h>
@@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_MAX"
+ "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_MAX"
+ "slock-AF_QIPCRTR", "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_MAX"
+ "clock-AF_QIPCRTR", "clock-AF_MAX"
};
/*
@@ -854,6 +854,13 @@ set_rcvbuf:
sk->sk_tskey = 0;
}
}
+
+ if (val & SOF_TIMESTAMPING_OPT_STATS &&
+ !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
+ ret = -EINVAL;
+ break;
+ }
+
sk->sk_tsflags = val;
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
@@ -2080,37 +2087,31 @@ void __sk_flush_backlog(struct sock *sk)
*/
int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int rc;
- DEFINE_WAIT(wait);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
+ rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
EXPORT_SYMBOL(sk_wait_data);
/**
- * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ * __sk_mem_raise_allocated - increase memory_allocated
* @sk: socket
* @size: memory size to allocate
+ * @amt: pages to allocate
* @kind: allocation type
*
- * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
- * rmem allocation. This function assumes that protocols which have
- * memory_pressure use sk_wmem_queued as write buffer accounting.
+ * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
*/
-int __sk_mem_schedule(struct sock *sk, int size, int kind)
+int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
- int amt = sk_mem_pages(size);
- long allocated;
-
- sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-
- allocated = sk_memory_allocated_add(sk, amt);
+ long allocated = sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
!mem_cgroup_charge_skmem(sk->sk_memcg, amt))
@@ -2171,9 +2172,6 @@ suppress_allocation:
trace_sock_exceed_buf_limit(sk, prot, allocated);
- /* Alas. Undo changes. */
- sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-
sk_memory_allocated_sub(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -2181,18 +2179,40 @@ suppress_allocation:
return 0;
}
+EXPORT_SYMBOL(__sk_mem_raise_allocated);
+
+/**
+ * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ * @sk: socket
+ * @size: memory size to allocate
+ * @kind: allocation type
+ *
+ * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ * rmem allocation. This function assumes that protocols which have
+ * memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+ int ret, amt = sk_mem_pages(size);
+
+ sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
+ ret = __sk_mem_raise_allocated(sk, size, amt, kind);
+ if (!ret)
+ sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
+ return ret;
+}
EXPORT_SYMBOL(__sk_mem_schedule);
/**
- * __sk_mem_reclaim - reclaim memory_allocated
+ * __sk_mem_reduce_allocated - reclaim memory_allocated
* @sk: socket
- * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ * @amount: number of quanta
+ *
+ * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
*/
-void __sk_mem_reclaim(struct sock *sk, int amount)
+void __sk_mem_reduce_allocated(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
sk_memory_allocated_sub(sk, amount);
- sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
@@ -2201,6 +2221,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
+EXPORT_SYMBOL(__sk_mem_reduce_allocated);
+
+/**
+ * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
+ * @sk: socket
+ * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ */
+void __sk_mem_reclaim(struct sock *sk, int amount)
+{
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ __sk_mem_reduce_allocated(sk, amount);
+}
EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
@@ -2436,8 +2469,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_type = sock->type;
sk->sk_wq = sock->wq;
sock->sk = sk;
- } else
+ sk->sk_uid = SOCK_INODE(sock)->i_uid;
+ } else {
sk->sk_wq = NULL;
+ sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
+ }
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
diff --git a/net/core/stream.c b/net/core/stream.c
index 1086c8b280a8..f575bcf64af2 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -53,8 +53,8 @@ void sk_stream_write_space(struct sock *sk)
*/
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct task_struct *tsk = current;
- DEFINE_WAIT(wait);
int done;
do {
@@ -68,13 +68,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
if (signal_pending(tsk))
return sock_intr_errno(*timeo_p);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending++;
done = sk_wait_event(sk, timeo_p,
!sk->sk_err &&
!((1 << sk->sk_state) &
- ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
- finish_wait(sk_sleep(sk), &wait);
+ ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending--;
} while (!done);
return 0;
@@ -94,16 +94,16 @@ static inline int sk_stream_closing(struct sock *sk)
void sk_stream_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ add_wait_queue(sk_sleep(sk), &wait);
do {
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
- if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+ if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait))
break;
} while (!signal_pending(current) && timeout);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
}
EXPORT_SYMBOL(sk_stream_wait_close);
@@ -119,16 +119,16 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
long vm_wait = 0;
long current_timeo = *timeo_p;
bool noblock = (*timeo_p ? false : true);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (sk_stream_memory_free(sk))
current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
+ add_wait_queue(sk_sleep(sk), &wait);
+
while (1) {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
if (!*timeo_p) {
@@ -147,7 +147,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
sk_wait_event(sk, &current_timeo, sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
(sk_stream_memory_free(sk) &&
- !vm_wait));
+ !vm_wait), &wait);
sk->sk_write_pending--;
if (vm_wait) {
@@ -161,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
*timeo_p = current_timeo;
}
out:
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return err;
do_error:
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa652530..2a46e4009f62 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (sock_table != orig_sock_table) {
rcu_assign_pointer(rps_sock_flow_table, sock_table);
- if (sock_table)
+ if (sock_table) {
static_key_slow_inc(&rps_needed);
+ static_key_slow_inc(&rfs_needed);
+ }
if (orig_sock_table) {
static_key_slow_dec(&rps_needed);
+ static_key_slow_dec(&rfs_needed);
synchronize_rcu();
vfree(orig_sock_table);
}
diff --git a/net/core/utils.c b/net/core/utils.c
index cf5622b9ccc4..6592d7bbed39 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,7 +31,7 @@
#include <net/net_ratelimit.h>
#include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index edbe59d203ef..d859a5c36e70 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -590,13 +590,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (inet_csk_reqsk_queue_is_full(sk))
goto drop;
- /*
- * Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk))
goto drop;
req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 715e5d1dc107..c4e879c02186 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -227,7 +227,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -281,7 +281,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
- ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
+ ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
@@ -326,7 +326,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (inet_csk_reqsk_queue_is_full(sk))
goto drop;
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk))
goto drop;
req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 13d6b1a6e0fc..a90ed67027b0 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1718,7 +1718,7 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
* See if there is data ready to read, sleep if there isn't
*/
for(;;) {
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (sk->sk_err)
goto out;
@@ -1749,11 +1749,11 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
+ sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
skb_queue_walk_safe(queue, skb, n) {
@@ -1999,19 +1999,19 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
* size.
*/
if (dn_queue_too_long(scp, queue, flags)) {
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (flags & MSG_DONTWAIT) {
err = -EWOULDBLOCK;
goto out;
}
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
- !dn_queue_too_long(scp, queue, flags));
+ !dn_queue_too_long(scp, queue, flags), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
continue;
}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index b2c26b081134..8fdd9f492b0e 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -42,7 +42,7 @@
#include <linux/notifier.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
#include <net/dst.h>
@@ -201,7 +201,7 @@ static struct dn_dev_sysctl_table {
.extra1 = &min_t3,
.extra2 = &max_t3
},
- {0}
+ { }
},
};
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index a796fc7cbc35..7af0ba6157a1 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -31,7 +31,7 @@
#include <linux/timer.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 1540b506e3e0..232675480756 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -25,7 +25,7 @@
#include <linux/timer.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/route.h> /* RTF_xxx */
#include <net/neighbour.h>
#include <net/netlink.h>
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5325b541c526..6c7da6c29bf0 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -22,7 +22,7 @@
#include <net/dst.h>
#include <net/flow.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/dn.h>
#include <net/dn_dev.h>
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 5fff951a0a49..da3862124545 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
return err;
}
- err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
- if (err)
- return err;
+ if (dst->ds[0]) {
+ err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+ if (err)
+ return err;
+ }
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
@@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dsa_ds_unapply(dst, ds);
}
- dsa_cpu_port_ethtool_restore(dst->ds[0]);
+ if (dst->ds[0])
+ dsa_cpu_port_ethtool_restore(dst->ds[0]);
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 30e2e21d7619..7d4596110851 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -641,7 +641,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* ethtool operations *******************************************************/
static int
-dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+dsa_slave_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
int err;
@@ -650,19 +651,20 @@ dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
if (p->phy != NULL) {
err = phy_read_status(p->phy);
if (err == 0)
- err = phy_ethtool_gset(p->phy, cmd);
+ err = phy_ethtool_ksettings_get(p->phy, cmd);
}
return err;
}
static int
-dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+dsa_slave_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
if (p->phy != NULL)
- return phy_ethtool_sset(p->phy, cmd);
+ return phy_ethtool_ksettings_set(p->phy, cmd);
return -EOPNOTSUPP;
}
@@ -990,8 +992,6 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
- .get_settings = dsa_slave_get_settings,
- .set_settings = dsa_slave_set_settings,
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.get_regs = dsa_slave_get_regs,
@@ -1007,6 +1007,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
+ .get_link_ksettings = dsa_slave_get_link_ksettings,
+ .set_link_ksettings = dsa_slave_set_link_ksettings,
};
static const struct net_device_ops dsa_slave_netdev_ops = {
@@ -1103,10 +1105,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
p->phy_interface = p->phy->interface;
- phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
-
- return 0;
+ return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
+ p->phy_interface);
}
static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
@@ -1201,6 +1201,8 @@ int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ netif_device_detach(slave_dev);
+
if (p->phy) {
phy_stop(p->phy);
p->old_pause = -1;
@@ -1250,6 +1252,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+ slave_dev->min_mtu = 0;
+ slave_dev->max_mtu = ETH_MAX_MTU;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 02acfff36028..8c5a479681ca 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,6 +62,7 @@
#include <net/dsa.h>
#include <net/flow_dissector.h>
#include <linux/uaccess.h>
+#include <net/pkt_sched.h>
__setup("ether=", netdev_boot_setup);
@@ -322,8 +323,7 @@ EXPORT_SYMBOL(eth_mac_addr);
*/
int eth_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
- return -EINVAL;
+ netdev_warn(dev, "%s is deprecated\n", __func__);
dev->mtu = new_mtu;
return 0;
}
@@ -357,8 +357,10 @@ void ether_setup(struct net_device *dev)
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
- dev->tx_queue_len = 1000; /* Ethernet wants good queues */
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
dev->priv_flags |= IFF_TX_SKB_SHARING;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 16737cd8dae8..fc65b145f6e7 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev)
random_ether_addr(dev->dev_addr);
ether_setup(dev);
+ dev->min_mtu = 0;
dev->header_ops = &hsr_header_ops;
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index d4d1617f43a8..1ab30e7d3f99 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
[HSR_A_IF2_SEQ] = { .type = NLA_U16 },
};
-static struct genl_family hsr_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = "HSR",
- .version = 1,
- .maxattr = HSR_A_MAX,
-};
+static struct genl_family hsr_genl_family;
static const struct genl_multicast_group hsr_mcgrps[] = {
{ .name = "hsr-network", },
@@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = {
},
};
+static struct genl_family hsr_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = "HSR",
+ .version = 1,
+ .maxattr = HSR_A_MAX,
+ .module = THIS_MODULE,
+ .ops = hsr_ops,
+ .n_ops = ARRAY_SIZE(hsr_ops),
+ .mcgrps = hsr_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
+};
+
int __init hsr_netlink_init(void)
{
int rc;
@@ -475,8 +481,7 @@ int __init hsr_netlink_init(void)
if (rc)
goto fail_rtnl_link_register;
- rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops,
- hsr_mcgrps);
+ rc = genl_register_family(&hsr_genl_family);
if (rc)
goto fail_genl_register_family;
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..ac7c96b73ad5 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,7 +7,7 @@
#include <net/inet_frag.h>
#include <net/6lowpan.h>
-typedef unsigned __bitwise__ lowpan_rx_result;
+typedef unsigned __bitwise lowpan_rx_result;
#define RX_CONTINUE ((__force lowpan_rx_result) 0u)
#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u)
#define RX_DROP ((__force lowpan_rx_result) 2u)
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 4adfd4d5471b..9b92ade687a3 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -7,5 +7,3 @@ ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
ieee802154_socket-y := socket.o
CFLAGS_trace.o := -I$(src)
-
-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index c8133c07ceee..6bde9e5a5503 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -28,14 +28,6 @@
static unsigned int ieee802154_seq_num;
static DEFINE_SPINLOCK(ieee802154_seq_lock);
-struct genl_family nl802154_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = IEEE802154_NL_NAME,
- .version = 1,
- .maxattr = IEEE802154_ATTR_MAX,
-};
-
/* Requests to userspace */
struct sk_buff *ieee802154_nl_create(int flags, u8 req)
{
@@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
[IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
};
+struct genl_family nl802154_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = IEEE802154_NL_NAME,
+ .version = 1,
+ .maxattr = IEEE802154_ATTR_MAX,
+ .module = THIS_MODULE,
+ .ops = ieee8021154_ops,
+ .n_ops = ARRAY_SIZE(ieee8021154_ops),
+ .mcgrps = ieee802154_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
+};
+
int __init ieee802154_nl_init(void)
{
- return genl_register_family_with_ops_groups(&nl802154_family,
- ieee8021154_ops,
- ieee802154_mcgrps);
+ return genl_register_family(&nl802154_family);
}
void ieee802154_nl_exit(void)
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 77d73014bde3..dc2960be51e0 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -286,9 +286,12 @@ int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info)
if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0')
return -EINVAL; /* name should be null-terminated */
+ rc = -ENODEV;
dev = dev_get_by_name(genl_info_net(info), name);
if (!dev)
- return -ENODEV;
+ return rc;
+ if (dev->type != ARPHRD_IEEE802154)
+ goto out;
phy = dev->ieee802154_ptr->wpan_phy;
BUG_ON(!phy);
@@ -342,6 +345,7 @@ nla_put_failure:
nlmsg_free(msg);
out_dev:
wpan_phy_put(phy);
+out:
if (dev)
dev_put(dev);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d90a4ed5b8a0..fc60cd061f39 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,23 +26,8 @@
#include "rdev-ops.h"
#include "core.h"
-static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
-static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
/* the netlink family */
-static struct genl_family nl802154_fam = {
- .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
- .name = NL802154_GENL_NAME, /* have users key off the name instead */
- .hdrsize = 0, /* no private header */
- .version = 1, /* no particular meaning now */
- .maxattr = NL802154_ATTR_MAX,
- .netnsok = true,
- .pre_doit = nl802154_pre_doit,
- .post_doit = nl802154_post_doit,
-};
+static struct genl_family nl802154_fam;
/* multicast groups */
enum nl802154_multicast_groups {
@@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
- nl802154_fam.attrbuf, nl802154_fam.maxattr,
+ genl_family_attrbuf(&nl802154_fam),
+ nl802154_fam.maxattr,
nl802154_policy);
if (err)
goto out_unlock;
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- nl802154_fam.attrbuf);
+ genl_family_attrbuf(&nl802154_fam));
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl802154_dump_wpan_phy_state *state)
{
- struct nlattr **tb = nl802154_fam.attrbuf;
+ struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
tb, nl802154_fam.maxattr, nl802154_policy);
@@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = {
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
+static struct genl_family nl802154_fam __ro_after_init = {
+ .name = NL802154_GENL_NAME, /* have users key off the name instead */
+ .hdrsize = 0, /* no private header */
+ .version = 1, /* no particular meaning now */
+ .maxattr = NL802154_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = nl802154_pre_doit,
+ .post_doit = nl802154_post_doit,
+ .module = THIS_MODULE,
+ .ops = nl802154_ops,
+ .n_ops = ARRAY_SIZE(nl802154_ops),
+ .mcgrps = nl802154_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
+};
+
/* initialisation/exit functions */
-int nl802154_init(void)
+int __init nl802154_init(void)
{
- return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops,
- nl802154_mcgrps);
+ return genl_register_family(&nl802154_fam);
}
void nl802154_exit(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b54b3ca939db..6e7baaf814c6 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -430,6 +430,14 @@ config INET_UDP_DIAG
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
+config INET_RAW_DIAG
+ tristate "RAW: socket monitoring interface"
+ depends on INET_DIAG && (IPV6 || IPV6=n)
+ default n
+ ---help---
+ Support for RAW socket monitoring interface used by the ss tool.
+ If unsure, say Y.
+
config INET_DIAG_DESTROY
bool "INET: allow privileged process to administratively close sockets"
depends on INET_DIAG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bc6a6c8b9bcd..48af58a5686e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
+obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 215143246e4b..f75069883f2b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -90,7 +90,7 @@
#include <linux/random.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/igmp.h>
@@ -374,8 +374,18 @@ lookup_protocol:
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
sk_common_release(sk);
+ goto out;
+ }
}
out:
return err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 062a67ca9a21..4cd2ee8857d2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -26,7 +26,7 @@
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/module.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 161fc0f0d752..7db2ad2e82d3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -14,7 +14,7 @@
*/
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/types.h>
@@ -46,6 +46,7 @@
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
+#include <net/lwtunnel.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -85,7 +86,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
if (tb)
return tb;
- if (id == RT_TABLE_LOCAL)
+ if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
alias = fib_new_table(net, RT_TABLE_MAIN);
tb = fib_trie_table(id, alias);
@@ -620,6 +621,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_FLOW] = { .type = NLA_U32 },
[RTA_ENCAP_TYPE] = { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
+ [RTA_UID] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -676,6 +678,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_mx_len = nla_len(attr);
break;
case RTA_MULTIPATH:
+ err = lwtunnel_valid_encap_type_attr(nla_data(attr),
+ nla_len(attr));
+ if (err < 0)
+ goto errout;
cfg->fc_mp = nla_data(attr);
cfg->fc_mp_len = nla_len(attr);
break;
@@ -690,6 +696,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
break;
case RTA_ENCAP_TYPE:
cfg->fc_encap_type = nla_get_u16(attr);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ if (err < 0)
+ goto errout;
break;
}
}
@@ -1218,6 +1227,8 @@ static int __net_init ip_fib_net_init(struct net *net)
int err;
size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
+ net->ipv4.fib_seq = 0;
+
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 388d3e21629b..9a375b908d01 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -13,7 +13,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -234,6 +234,7 @@ void free_fib_info(struct fib_info *fi)
#endif
call_rcu(&fi->rcu, free_fib_info_rcu);
}
+EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
@@ -1278,8 +1279,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
goto nla_put_failure;
#endif
- if (fi->fib_nh->nh_lwtstate)
- lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
+ if (fi->fib_nh->nh_lwtstate &&
+ lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
+ goto nla_put_failure;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
@@ -1315,8 +1317,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
goto nla_put_failure;
#endif
- if (nh->nh_lwtstate)
- lwtunnel_fill_encap(skb, nh->nh_lwtstate);
+ if (nh->nh_lwtstate &&
+ lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
+ goto nla_put_failure;
+
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi);
@@ -1617,8 +1621,13 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, int mp_hash)
{
+ bool oif_check;
+
+ oif_check = (fl4->flowi4_oif == 0 ||
+ fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+ if (res->fi->fib_nhs > 1 && oif_check) {
if (mp_hash < 0)
mp_hash = get_hash_from_flowi4(fl4) >> 1;
@@ -1628,7 +1637,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
#endif
if (!res->prefixlen &&
res->table->tb_num_default > 1 &&
- res->type == RTN_UNICAST && !fl4->flowi4_oif)
+ res->type == RTN_UNICAST && oif_check)
fib_select_default(fl4, res);
if (!fl4->saddr)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e3665bf7a7f3..2919d1a10cfd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
#define VERSION "0.409"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -84,25 +84,114 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static BLOCKING_NOTIFIER_HEAD(fib_chain);
+static unsigned int fib_seq_sum(void)
+{
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net)
+ fib_seq += net->ipv4.fib_seq;
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
-int register_fib_notifier(struct notifier_block *nb)
+static int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
- return blocking_notifier_chain_register(&fib_chain, nb);
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+
+static void fib_rules_notify(struct net *net, struct notifier_block *nb,
+ enum fib_event_type event_type)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ struct fib_notifier_info info;
+
+ if (net->ipv4.fib_has_custom_rules)
+ call_fib_notifier(nb, net, event_type, &info);
+#endif
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+ enum fib_event_type event_type);
+
+static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type, u32 dst,
+ int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id, u32 nlflags)
+{
+ struct fib_entry_notifier_info info = {
+ .dst = dst,
+ .dst_len = dst_len,
+ .fi = fi,
+ .tos = tos,
+ .type = type,
+ .tb_id = tb_id,
+ .nlflags = nlflags,
+ };
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ /* Mutex semantics guarantee that every change done to
+ * FIB tries before we read the change sequence counter
+ * is now visible to us.
+ */
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
+ fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
}
EXPORT_SYMBOL(register_fib_notifier);
int unregister_fib_notifier(struct notifier_block *nb)
{
- return blocking_notifier_chain_unregister(&fib_chain, nb);
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
}
EXPORT_SYMBOL(unregister_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ net->ipv4.fib_seq++;
info->net = net;
- return blocking_notifier_call_chain(&fib_chain, event_type, info);
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
}
static int call_fib_entry_notifiers(struct net *net,
@@ -1903,6 +1992,62 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
return found;
}
+static void fib_leaf_notify(struct net *net, struct key_vector *l,
+ struct fib_table *tb, struct notifier_block *nb,
+ enum fib_event_type event_type)
+{
+ struct fib_alias *fa;
+
+ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (!fi)
+ continue;
+
+ /* local and main table can share the same trie,
+ * so don't notify twice for the same entry.
+ */
+ if (tb->tb_id != fa->tb_id)
+ continue;
+
+ call_fib_entry_notifier(nb, net, event_type, l->key,
+ KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
+ fa->fa_type, fa->tb_id, 0);
+ }
+}
+
+static void fib_table_notify(struct net *net, struct fib_table *tb,
+ struct notifier_block *nb,
+ enum fib_event_type event_type)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *l, *tp = t->kv;
+ t_key key = 0;
+
+ while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+ fib_leaf_notify(net, l, tb, nb, event_type);
+
+ key = l->key + 1;
+ /* stop in case of wrap around */
+ if (key < l->key)
+ break;
+ }
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+ enum fib_event_type event_type)
+{
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct fib_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb_hlist)
+ fib_table_notify(net, tb, nb, event_type);
+ }
+}
+
static void __trie_free_rcu(struct rcu_head *head)
{
struct fib_table *tb = container_of(head, struct fib_table, rcu);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 030d1531e897..805f6607f8d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
return err;
}
-static struct genl_family fou_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = FOU_GENL_NAME,
- .version = FOU_GENL_VERSION,
- .maxattr = FOU_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family fou_nl_family;
static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_PORT] = { .type = NLA_U16, },
@@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static struct genl_family fou_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = FOU_GENL_NAME,
+ .version = FOU_GENL_VERSION,
+ .maxattr = FOU_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = fou_nl_ops,
+ .n_ops = ARRAY_SIZE(fou_nl_ops),
+};
+
size_t fou_encap_hlen(struct ip_tunnel_encap *e)
{
return sizeof(struct udphdr);
@@ -1086,8 +1090,7 @@ static int __init fou_init(void)
if (ret)
goto exit;
- ret = genl_register_family_with_ops(&fou_nl_family,
- fou_nl_ops);
+ ret = genl_register_family(&fou_nl_family);
if (ret < 0)
goto unregister;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 48734ee6293f..0777ea949223 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -91,7 +91,7 @@
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <net/inet_common.h>
@@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
+ fl4.flowi4_uid = sock_net_uid(net, NULL);
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
@@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
param->replyopts.opt.opt.faddr : iph->saddr);
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
+ fl4->flowi4_uid = sock_net_uid(net, NULL);
fl4->flowi4_tos = RT_TOS(tos);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
@@ -1045,12 +1047,12 @@ int icmp_rcv(struct sk_buff *skb)
if (success) {
consume_skb(skb);
- return 0;
+ return NET_RX_SUCCESS;
}
drop:
kfree_skb(skb);
- return 0;
+ return NET_RX_DROP;
csum_error:
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15db786d50ed..5b15459955f8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -72,7 +72,7 @@
#include <linux/module.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
@@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
static void igmp_gq_start_timer(struct in_device *in_dev)
{
int tv = prandom_u32() % in_dev->mr_maxdelay;
+ unsigned long exp = jiffies + tv + 2;
+
+ if (in_dev->mr_gq_running &&
+ time_after_eq(exp, (in_dev->mr_gq_timer).expires))
+ return;
in_dev->mr_gq_running = 1;
- if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2))
+ if (!mod_timer(&in_dev->mr_gq_timer, exp))
in_dev_hold(in_dev);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61a9deec2993..19ea045c50ed 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
EXPORT_SYMBOL(inet_get_local_port_range);
int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax)
+ const struct inet_bind_bucket *tb, bool relax,
+ bool reuseport_ok)
{
struct sock *sk2;
- int reuse = sk->sk_reuse;
- int reuseport = sk->sk_reuseport;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport && reuseport_ok;
kuid_t uid = sock_i_uid((struct sock *)sk);
/*
@@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct inet_bind_bucket *tb;
kuid_t uid = sock_i_uid(sk);
u32 remaining, offset;
+ bool reuseport_ok = !!snum;
if (port) {
have_port:
@@ -165,7 +167,8 @@ other_parity_scan:
smallest_size = tb->num_owners;
smallest_port = port;
}
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
+ if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false,
+ reuseport_ok))
goto tb_found;
goto next_port;
}
@@ -206,13 +209,14 @@ tb_found:
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
smallest_size == -1)
goto success;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
+ if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true,
+ reuseport_ok)) {
if ((reuse ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
- smallest_size != -1 && --attempts >= 0) {
+ !snum && smallest_size != -1 && --attempts >= 0) {
spin_unlock_bh(&head->lock);
goto again;
}
@@ -415,7 +419,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
- htons(ireq->ir_num));
+ htons(ireq->ir_num), sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -452,7 +456,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
- htons(ireq->ir_num));
+ htons(ireq->ir_num), sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e4d16fc5bbb3..4dea33e5f295 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
goto errout;
+ /*
+ * RAW sockets might have user-defined protocols assigned,
+ * so report the one supplied on socket creation.
+ */
+ if (sk->sk_type == SOCK_RAW) {
+ if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
+ goto errout;
+ }
+
if (!icsk) {
handler->idiag_get_info(sk, r, NULL);
goto out;
@@ -852,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct net *net = sock_net(skb->sk);
- int i, num, s_i, s_num;
u32 idiag_states = r->idiag_states;
- bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ int i, num, s_i, s_num;
+ struct sock *sk;
if (idiag_states & TCPF_SYN_RECV)
idiag_states |= TCPF_NEW_SYN_RECV;
@@ -863,16 +873,15 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
s_num = num = cb->args[2];
if (cb->args[0] == 0) {
- if (!(idiag_states & TCPF_LISTEN))
+ if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
goto skip_listen_ht;
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
struct inet_listen_hashbucket *ilb;
- struct sock *sk;
num = 0;
ilb = &hashinfo->listening_hash[i];
- spin_lock_bh(&ilb->lock);
+ spin_lock(&ilb->lock);
sk_for_each(sk, &ilb->head) {
struct inet_sock *inet = inet_sk(sk);
@@ -892,26 +901,18 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
r->id.idiag_sport)
goto next_listen;
- if (r->id.idiag_dport ||
- cb->args[3] > 0)
- goto next_listen;
-
if (inet_csk_diag_dump(sk, skb, cb, r,
bc, net_admin) < 0) {
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
goto done;
}
next_listen:
- cb->args[3] = 0;
- cb->args[4] = 0;
++num;
}
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
s_num = 0;
- cb->args[3] = 0;
- cb->args[4] = 0;
}
skip_listen_ht:
cb->args[0] = 1;
@@ -921,13 +922,14 @@ skip_listen_ht:
if (!(idiag_states & ~TCPF_LISTEN))
goto out;
+#define SKARR_SZ 16
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
struct hlist_nulls_node *node;
- struct sock *sk;
-
- num = 0;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
if (hlist_nulls_empty(&head->chain))
continue;
@@ -935,9 +937,12 @@ skip_listen_ht:
if (i > s_i)
s_num = 0;
+next_chunk:
+ num = 0;
+ accum = 0;
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &head->chain) {
- int state, res;
+ int state;
if (!net_eq(sock_net(sk), net))
continue;
@@ -961,21 +966,35 @@ skip_listen_ht:
if (!inet_diag_bc_sk(bc, sk))
goto next_normal;
- res = sk_diag_fill(sk, skb, r,
+ sock_hold(sk);
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ break;
+next_normal:
+ ++num;
+ }
+ spin_unlock_bh(lock);
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = sk_diag_fill(sk_arr[idx], skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->nlh, net_admin);
- if (res < 0) {
- spin_unlock_bh(lock);
- goto done;
+ if (res < 0)
+ num = num_arr[idx];
}
-next_normal:
- ++num;
+ sock_gen_put(sk_arr[idx]);
}
-
- spin_unlock_bh(lock);
+ if (res < 0)
+ break;
cond_resched();
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto next_chunk;
+ }
}
done:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 576f705d8180..c9c1cb635d9a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -17,7 +17,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
@@ -113,8 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
-static int ipgre_net_id __read_mostly;
-static int gre_tap_net_id __read_mostly;
+static unsigned int ipgre_net_id __read_mostly;
+static unsigned int gre_tap_net_id __read_mostly;
static void ipgre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d158ff1def1..93157f2f4758 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 877bdb02e887..b67719f45953 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -42,7 +42,7 @@
* Hirokazu Takahashi: sendfile() on UDP works now.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -74,6 +74,7 @@
#include <net/checksum.h>
#include <net/inetpeer.h>
#include <net/lwtunnel.h>
+#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -287,6 +288,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@@ -305,6 +313,20 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return ip_finish_output2(net, sk, skb);
}
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ return dev_loopback_xmit(net, sk, skb);
+}
+
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
@@ -342,7 +364,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
/* Multicasts with ttl 0 must not go beyond the host */
@@ -358,7 +380,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -583,7 +605,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
*/
if (skb_has_frag_list(skb)) {
struct sk_buff *frag, *frag2;
- int first_len = skb_pagelen(skb);
+ unsigned int first_len = skb_pagelen(skb);
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
@@ -804,11 +826,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
struct msghdr *msg = from;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (copy_from_iter(to, len, &msg->msg_iter) != len)
+ if (!copy_from_iter_full(to, len, &msg->msg_iter))
return -EFAULT;
} else {
__wsum csum = 0;
- if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len)
+ if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter))
return -EFAULT;
skb->csum = csum_block_add(skb->csum, csum, odd);
}
@@ -936,7 +958,7 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
@@ -1594,7 +1616,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
- tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+ tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+ arg->uid);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -1606,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
if (unlikely(err)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b8a2d63d1fb8..53ae0c6315ad 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -44,7 +44,7 @@
#include <net/ip_fib.h>
#include <linux/errqueue.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
* SOL_IP control messages.
@@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
}
+static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
+{
+ int val;
+
+ if (IPCB(skb)->frag_max_size == 0)
+ return;
+
+ val = IPCB(skb)->frag_max_size;
+ put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
+}
+
static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
int tlen, int offset)
{
@@ -137,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
__be16 *ports = (__be16 *)skb_transport_header(skb);
- if (skb_transport_offset(skb) + 4 > skb->len)
+ if (skb_transport_offset(skb) + 4 > (int)skb->len)
return;
/* All current transport protocols have the port numbers in the
@@ -153,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
}
-void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
- int tlen, int offset)
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, int tlen, int offset)
{
- struct inet_sock *inet = inet_sk(skb->sk);
+ struct inet_sock *inet = inet_sk(sk);
unsigned int flags = inet->cmsg_flags;
/* Ordered by supposed usage frequency */
@@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
if (flags & IP_CMSG_CHECKSUM)
ip_cmsg_recv_checksum(msg, skb, tlen, offset);
+
+ if (flags & IP_CMSG_RECVFRAGSIZE)
+ ip_cmsg_recv_fragsize(msg, skb);
}
EXPORT_SYMBOL(ip_cmsg_recv_offset);
@@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_LOOP:
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
+ case IP_RECVFRAGSIZE:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
}
break;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ goto e_inval;
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
+ break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
@@ -1202,8 +1225,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
* which has interface index (iif) as the first member of the
* underlying inet{6}_skb_parm struct. This code then overlays
* PKTINFO_SKB_CB and in_pktinfo also has iif as the first
- * element so the iif is picked up from the prior IPCB
+ * element so the iif is picked up from the prior IPCB. If iif
+ * is the loopback interface, then return the sending interface
+ * (e.g., process binds socket to eth0 for Tx which is
+ * redirected to loopback in the rtable/dst).
*/
+ if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
+ pktinfo->ipi_ifindex = inet_iif(skb);
+
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
} else {
pktinfo->ipi_ifindex = 0;
@@ -1357,6 +1386,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
break;
+ case IP_RECVFRAGSIZE:
+ val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
+ break;
case IP_TOS:
val = inet->tos;
break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..823abaef006b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -358,6 +358,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
{
struct ip_tunnel *nt;
struct net_device *dev;
+ int t_hlen;
BUG_ON(!itn->fb_tunnel_dev);
dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
@@ -367,6 +368,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
dev->mtu = ip_tunnel_bind_dev(dev);
nt = netdev_priv(dev);
+ t_hlen = nt->hlen + sizeof(struct iphdr);
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
}
@@ -929,7 +933,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
if (new_mtu > max_mtu) {
@@ -990,7 +994,7 @@ int ip_tunnel_get_iflink(const struct net_device *dev)
}
EXPORT_SYMBOL(ip_tunnel_get_iflink);
-int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
@@ -1192,7 +1196,7 @@ void ip_tunnel_uninit(struct net_device *dev)
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
/* Do least required initialization, rest of init is done in tunnel_init call */
-void ip_tunnel_setup(struct net_device *dev, int net_id)
+void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->ip_tnl_net_id = net_id;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index fed3d29f9eb3..0fd1976ab63b 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
.fill_encap = ip_tun_fill_encap_info,
.get_encap_size = ip_tun_encap_nlsize,
.cmp_encap = ip_tun_cmp_encap,
+ .owner = THIS_MODULE,
};
static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
@@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
.fill_encap = ip6_tun_fill_encap_info,
.get_encap_size = ip6_tun_encap_nlsize,
.cmp_encap = ip_tun_cmp_encap,
+ .owner = THIS_MODULE,
};
void __init ip_tunnel_core_init(void)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5d7944f394d9..8b14f1404c8f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -46,7 +46,7 @@
static struct rtnl_link_ops vti_link_ops __read_mostly;
-static int vti_net_id __read_mostly;
+static unsigned int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 071a785c65eb..fd9f34bbd740 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -61,7 +61,7 @@
#include <net/ipconfig.h>
#include <net/route.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <asm/processor.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c9392589c415..00d4229b6954 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -96,7 +96,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
@@ -121,7 +121,7 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-static int ipip_net_id __read_mostly;
+static unsigned int ipip_net_id __read_mostly;
static int ipip_tunnel_init(struct net_device *dev);
static struct rtnl_link_ops ipip_link_ops __read_mostly;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 27089f5ebbb1..efc1e76d4977 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -26,7 +26,7 @@
*
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/capability.h>
#include <linux/errno.h>
@@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
.flags = FIB_LOOKUP_NOREF,
};
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi4_to_flowi(flp4));
+
err = fib_rules_lookup(net->ipv4.mr_rules_ops,
flowi4_to_flowi(flp4), 0, &arg);
if (err < 0)
@@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
return -EINVAL;
}
- mrt = ipmr_get_table(rule->fr_net, rule->table);
+ arg->table = fib_rule_get_table(rule, arg);
+
+ mrt = ipmr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
/* Wrong interface: drop packet and (maybe) send PIM assert. */
if (mrt->vif_table[vif].dev != skb->dev) {
+ struct net_device *mdev;
+
+ mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
+ if (mdev == skb->dev)
+ goto forward;
+
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
@@ -2053,7 +2064,7 @@ static int pim_rcv(struct sk_buff *skb)
goto drop;
pim = (struct pimreghdr *)skb_transport_header(skb);
- if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
+ if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
(pim->flags & PIM_NULL_REGISTER) ||
(ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
csum_fold(skb_checksum(skb, 0, skb->len, 0))))
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d613309e3e5d..c11eb1744ab1 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
+config NF_SOCKET_IPV4
+ tristate "IPv4 socket lookup support"
+ help
+ This option enables the IPv4 socket lookup infrastructure. This is
+ is required by the iptables socket match.
+
if NF_TABLES
config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
+config NFT_FIB_IPV4
+ select NFT_FIB
+ tristate "nf_tables fib / ip route lookup support"
+ help
+ This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
+ It also allows query of the FIB for the route type, e.g. local, unicast,
+ multicast or blackhole.
+
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 853328f8fd05..f462fee66ac8 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
+
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 697538464e6e..a467e1236c43 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -24,7 +24,7 @@
#include <linux/err.h>
#include <net/compat.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
@@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
*/
e = get_entry(table_base, private->hook_entry[hook]);
- acpar.net = state->net;
- acpar.in = state->in;
- acpar.out = state->out;
- acpar.hooknum = hook;
- acpar.family = NFPROTO_ARP;
+ acpar.state = state;
acpar.hotdrop = false;
arp = arp_hdr(skb);
@@ -415,17 +411,15 @@ static inline int check_target(struct arpt_entry *e, const char *name)
}
static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
- unsigned long pcnt;
int ret;
- pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(pcnt))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
- e->counters.pcnt = pcnt;
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
@@ -443,7 +437,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
err:
module_put(t->u.kernel.target->me);
out:
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -523,7 +517,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
@@ -532,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct arpt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -594,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, repl->name, repl->size);
+ ret = find_check_entry(iter, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
@@ -809,7 +805,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -838,7 +834,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(NFPROTO_ARP);
@@ -863,7 +859,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
@@ -875,7 +871,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
return ret;
}
@@ -902,8 +898,8 @@ static int __do_replace(struct net *net, const char *name,
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1018,8 +1014,8 @@ static int do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free;
}
@@ -1408,7 +1404,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1423,7 +1419,7 @@ static int compat_get_entries(struct net *net,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
xt_compat_unlock(NFPROTO_ARP);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7c00ce90adb8..91656a1d8fbd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -20,7 +20,7 @@
#include <linux/icmp.h>
#include <net/ip.h>
#include <net/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/mutex.h>
#include <linux/proc_fs.h>
#include <linux/err.h>
@@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb,
acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
acpar.thoff = ip_hdrlen(skb);
acpar.hotdrop = false;
- acpar.net = state->net;
- acpar.in = state->in;
- acpar.out = state->out;
- acpar.family = NFPROTO_IPV4;
- acpar.hooknum = hook;
+ acpar.state = state;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
local_bh_disable();
@@ -535,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
static int
find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
- unsigned int size)
+ unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
@@ -543,12 +540,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- unsigned long pcnt;
- pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(pcnt))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
- e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -586,7 +580,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
cleanup_match(ematch, net);
}
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -674,7 +668,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
@@ -683,6 +677,7 @@ static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct ipt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -742,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, net, repl->name, repl->size);
+ ret = find_check_entry(iter, net, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
@@ -977,7 +973,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET);
@@ -1032,7 +1028,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1043,7 +1039,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
return ret;
}
@@ -1068,8 +1064,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1184,8 +1180,8 @@ do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET, tmp.name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free;
}
@@ -1630,7 +1626,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1644,7 +1640,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
xt_compat_unlock(AF_INET);
return ret;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4a9e6db9df8d..0a783cd73faf 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -62,7 +62,7 @@ struct clusterip_config {
static const struct file_operations clusterip_proc_fops;
#endif
-static int clusterip_net_id __read_mostly;
+static unsigned int clusterip_net_id __read_mostly;
struct clusterip_net {
struct list_head configs;
@@ -144,6 +144,11 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
rcu_read_lock_bh();
c = __clusterip_config_find(net, clusterip);
if (c) {
+#ifdef CONFIG_PROC_FS
+ if (!c->pde)
+ c = NULL;
+ else
+#endif
if (unlikely(!atomic_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
@@ -166,14 +171,15 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
static struct clusterip_config *
clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
- struct net_device *dev)
+ struct net_device *dev)
{
+ struct net *net = dev_net(dev);
struct clusterip_config *c;
- struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
- return NULL;
+ return ERR_PTR(-ENOMEM);
c->dev = dev;
c->clusterip = ip;
@@ -185,6 +191,17 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
atomic_set(&c->refcount, 1);
atomic_set(&c->entries, 1);
+ spin_lock_bh(&cn->lock);
+ if (__clusterip_config_find(net, ip)) {
+ spin_unlock_bh(&cn->lock);
+ kfree(c);
+
+ return ERR_PTR(-EBUSY);
+ }
+
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
+
#ifdef CONFIG_PROC_FS
{
char buffer[16];
@@ -195,16 +212,16 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
+ spin_lock_bh(&cn->lock);
+ list_del_rcu(&c->list);
+ spin_unlock_bh(&cn->lock);
kfree(c);
- return NULL;
+
+ return ERR_PTR(-ENOMEM);
}
}
#endif
- spin_lock_bh(&cn->lock);
- list_add_rcu(&c->list, &cn->configs);
- spin_unlock_bh(&cn->lock);
-
return c;
}
@@ -410,16 +427,16 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
config = clusterip_config_init(cipinfo,
e->ip.dst.s_addr, dev);
- if (!config) {
+ if (IS_ERR(config)) {
dev_put(dev);
- return -ENOMEM;
+ return PTR_ERR(config);
}
dev_mc_add(config->dev, config->clustermac);
}
}
cipinfo->config = config;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -444,7 +461,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
clusterip_config_put(cipinfo->config);
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_get(par->net, par->family);
}
#ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index da7f02a0b868..a03e4e7ef5f9 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static unsigned int
@@ -55,7 +55,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
range.min_proto = mr->range[0].min;
range.max_proto = mr->range[0].max;
- return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out);
+ return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
+ xt_out(par));
+}
+
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target masquerade_tg_reg __read_mostly = {
@@ -66,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
.table = "nat",
.hooks = 1 << NF_INET_POST_ROUTING,
.checkentry = masquerade_tg_check,
+ .destroy = masquerade_tg_destroy,
.me = THIS_MODULE,
};
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1d16c0f28df0..8bd0d7b26632 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,7 +34,7 @@ static unsigned int
reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_reject_info *reject = par->targinfo;
- int hook = par->hooknum;
+ int hook = xt_hooknum(par);
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
break;
case IPT_TCP_RESET:
- nf_send_reset(par->net, skb, hook);
+ nf_send_reset(xt_net(par), skb, hook);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index db5b87509446..30c0de53e254 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -263,12 +263,12 @@ static unsigned int
synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct net *net = par->net;
+ struct net *net = xt_net(par);
struct synproxy_net *snet = synproxy_pernet(net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
- if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
return NF_DROP;
th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
@@ -418,12 +418,12 @@ static int synproxy_tg4_check(const struct xt_tgchk_param *par)
e->ip.invflags & XT_INV_PROTO)
return -EINVAL;
- return nf_ct_l3proto_try_module_get(par->family);
+ return nf_ct_netns_get(par->net, par->family);
}
static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target synproxy_tg4_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 78cc64eddfc1..37fb9552e858 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -63,10 +63,10 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
return dev_match || flags & XT_RPFILTER_LOOSE;
}
-static bool rpfilter_is_local(const struct sk_buff *skb)
+static bool
+rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
- const struct rtable *rt = skb_rtable(skb);
- return rt && (rt->rt_flags & RTCF_LOCAL);
+ return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
@@ -79,14 +79,16 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
info = par->matchinfo;
invert = info->flags & XT_RPFILTER_INVERT;
- if (rpfilter_is_local(skb))
+ if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;
iph = ip_hdr(skb);
- if (ipv4_is_multicast(iph->daddr)) {
- if (ipv4_is_zeronet(iph->saddr))
- return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ if (ipv4_is_zeronet(iph->saddr)) {
+ if (ipv4_is_lbcast(iph->daddr) ||
+ ipv4_is_local_multicast(iph->daddr))
+ return true ^ invert;
}
+
flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
@@ -95,7 +97,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
- return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 713c09a74b90..fcfd071f4705 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -31,6 +31,13 @@
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/nf_log.h>
+static int conntrack4_net_id __read_mostly;
+static DEFINE_MUTEX(register_ipv4_hooks);
+
+struct conntrack4_net {
+ unsigned int users;
+};
+
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -307,9 +314,42 @@ static struct nf_sockopt_ops so_getorigdst = {
.owner = THIS_MODULE,
};
-static int ipv4_init_net(struct net *net)
+static int ipv4_hooks_register(struct net *net)
{
- return 0;
+ struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
+ int err = 0;
+
+ mutex_lock(&register_ipv4_hooks);
+
+ cnet->users++;
+ if (cnet->users > 1)
+ goto out_unlock;
+
+ err = nf_defrag_ipv4_enable(net);
+ if (err) {
+ cnet->users = 0;
+ goto out_unlock;
+ }
+
+ err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+
+ if (err)
+ cnet->users = 0;
+ out_unlock:
+ mutex_unlock(&register_ipv4_hooks);
+ return err;
+}
+
+static void ipv4_hooks_unregister(struct net *net)
+{
+ struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
+
+ mutex_lock(&register_ipv4_hooks);
+ if (cnet->users && (--cnet->users == 0))
+ nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+ mutex_unlock(&register_ipv4_hooks);
}
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
@@ -325,7 +365,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
#endif
- .init_net = ipv4_init_net,
+ .net_ns_get = ipv4_hooks_register,
+ .net_ns_put = ipv4_hooks_unregister,
.me = THIS_MODULE,
};
@@ -336,52 +377,50 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
+static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+ &nf_conntrack_l4proto_tcp4,
+ &nf_conntrack_l4proto_udp4,
+ &nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ &nf_conntrack_l4proto_udplite4,
+#endif
+};
+
static int ipv4_net_init(struct net *net)
{
int ret = 0;
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
- if (ret < 0) {
- pr_err("nf_conntrack_tcp4: pernet registration failed\n");
- goto out_tcp;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
- if (ret < 0) {
- pr_err("nf_conntrack_udp4: pernet registration failed\n");
- goto out_udp;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
- if (ret < 0) {
- pr_err("nf_conntrack_icmp4: pernet registration failed\n");
- goto out_icmp;
- }
+ ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
+ if (ret < 0)
+ return ret;
ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: pernet registration failed\n");
- goto out_ipv4;
+ nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
}
- return 0;
-out_ipv4:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
-out_icmp:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
-out_udp:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
-out_tcp:
return ret;
}
static void ipv4_net_exit(struct net *net)
{
nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+ nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
}
static struct pernet_operations ipv4_net_ops = {
.init = ipv4_net_init,
.exit = ipv4_net_exit,
+ .id = &conntrack4_net_id,
+ .size = sizeof(struct conntrack4_net),
};
static int __init nf_conntrack_l3proto_ipv4_init(void)
@@ -389,7 +428,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
int ret = 0;
need_conntrack();
- nf_defrag_ipv4_enable();
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
@@ -403,46 +441,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
goto cleanup_sockopt;
}
- ret = nf_register_hooks(ipv4_conntrack_ops,
- ARRAY_SIZE(ipv4_conntrack_ops));
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register hooks.\n");
+ ret = nf_ct_l4proto_register(builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
+ if (ret < 0)
goto cleanup_pernet;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
- goto cleanup_hooks;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
- goto cleanup_tcp4;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
- goto cleanup_udp4;
- }
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
- goto cleanup_icmpv4;
+ goto cleanup_l4proto;
}
return ret;
- cleanup_icmpv4:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- cleanup_udp4:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- cleanup_tcp4:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
- cleanup_hooks:
- nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+cleanup_l4proto:
+ nf_ct_l4proto_unregister(builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
cleanup_pernet:
unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
@@ -454,10 +467,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
- nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ nf_ct_l4proto_unregister(builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index d88da36b383c..49bd6a54404f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -11,6 +11,7 @@
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <net/netns/generic.h>
#include <net/route.h>
#include <net/ip.h>
@@ -22,6 +23,8 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
+static DEFINE_MUTEX(defrag4_mutex);
+
static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
@@ -102,18 +105,50 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
},
};
+static void __net_exit defrag4_net_exit(struct net *net)
+{
+ if (net->nf.defrag_ipv4) {
+ nf_unregister_net_hooks(net, ipv4_defrag_ops,
+ ARRAY_SIZE(ipv4_defrag_ops));
+ net->nf.defrag_ipv4 = false;
+ }
+}
+
+static struct pernet_operations defrag4_net_ops = {
+ .exit = defrag4_net_exit,
+};
+
static int __init nf_defrag_init(void)
{
- return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+ return register_pernet_subsys(&defrag4_net_ops);
}
static void __exit nf_defrag_fini(void)
{
- nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+ unregister_pernet_subsys(&defrag4_net_ops);
}
-void nf_defrag_ipv4_enable(void)
+int nf_defrag_ipv4_enable(struct net *net)
{
+ int err = 0;
+
+ might_sleep();
+
+ if (net->nf.defrag_ipv4)
+ return 0;
+
+ mutex_lock(&defrag4_mutex);
+ if (net->nf.defrag_ipv4)
+ goto out_unlock;
+
+ err = nf_register_net_hooks(net, ipv4_defrag_ops,
+ ARRAY_SIZE(ipv4_defrag_ops));
+ if (err == 0)
+ net->nf.defrag_ipv4 = true;
+
+ out_unlock:
+ mutex_unlock(&defrag4_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fd8220213afc..146d86105183 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
new file mode 100644
index 000000000000..a83d558e1aae
--- /dev/null
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
+ __be32 *raddr, __be32 *laddr,
+ __be16 *rport, __be16 *lport)
+{
+ unsigned int outside_hdrlen = ip_hdrlen(skb);
+ struct iphdr *inside_iph, _inside_iph;
+ struct icmphdr *icmph, _icmph;
+ __be16 *ports, _ports[2];
+
+ icmph = skb_header_pointer(skb, outside_hdrlen,
+ sizeof(_icmph), &_icmph);
+ if (icmph == NULL)
+ return 1;
+
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_REDIRECT:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ break;
+ default:
+ return 1;
+ }
+
+ inside_iph = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr),
+ sizeof(_inside_iph), &_inside_iph);
+ if (inside_iph == NULL)
+ return 1;
+
+ if (inside_iph->protocol != IPPROTO_TCP &&
+ inside_iph->protocol != IPPROTO_UDP)
+ return 1;
+
+ ports = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr) +
+ (inside_iph->ihl << 2),
+ sizeof(_ports), &_ports);
+ if (ports == NULL)
+ return 1;
+
+ /* the inside IP packet is the one quoted from our side, thus
+ * its saddr is the local address */
+ *protocol = inside_iph->protocol;
+ *laddr = inside_iph->saddr;
+ *lport = ports[0];
+ *raddr = inside_iph->daddr;
+ *rport = ports[1];
+
+ return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet_lookup(net, &tcp_hashinfo, skb, doff,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+ return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev)
+{
+ __be32 uninitialized_var(daddr), uninitialized_var(saddr);
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ u8 uninitialized_var(protocol);
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn const *ct;
+#endif
+ int doff = 0;
+
+ if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
+ protocol = iph->protocol;
+ saddr = iph->saddr;
+ sport = hp->source;
+ daddr = iph->daddr;
+ dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
+
+ } else if (iph->protocol == IPPROTO_ICMP) {
+ if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
+ &sport, &dport))
+ return NULL;
+ } else {
+ return NULL;
+ }
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && !nf_ct_is_untracked(ct) &&
+ ((iph->protocol != IPPROTO_ICMP &&
+ ctinfo == IP_CT_ESTABLISHED_REPLY) ||
+ (iph->protocol == IPPROTO_ICMP &&
+ ctinfo == IP_CT_RELATED_REPLY)) &&
+ (ct->status & IPS_SRC_NAT_DONE)) {
+
+ daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+ dport = (iph->protocol == IPPROTO_TCP) ?
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ }
+#endif
+
+ return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
+ daddr, sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index 0c01a270bf9f..0af3d8df70dd 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
};
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
- nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
+ nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif);
}
static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
new file mode 100644
index 000000000000..2981291910dd
--- /dev/null
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -0,0 +1,236 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+#define DSCP_BITS 0xfc
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dst = &regs->data[priv->dreg];
+ const struct net_device *dev = NULL;
+ const struct iphdr *iph;
+ __be32 addr;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ dev = nft_in(pkt);
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ dev = nft_out(pkt);
+
+ iph = ip_hdr(pkt->skb);
+ if (priv->flags & NFTA_FIB_F_DADDR)
+ addr = iph->daddr;
+ else
+ addr = iph->saddr;
+
+ *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
+
+static int get_ifindex(const struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ const struct iphdr *iph;
+ struct fib_result res;
+ struct flowi4 fl4 = {
+ .flowi4_scope = RT_SCOPE_UNIVERSE,
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ };
+ const struct net_device *oif;
+ struct net_device *found;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int i;
+#endif
+
+ /*
+ * Do not set flowi4_oif, it restricts results (for example, asking
+ * for oif 3 will get RTN_UNICAST result even if the daddr exits
+ * on another interface.
+ *
+ * Search results for the desired outinterface instead.
+ */
+ if (priv->flags & NFTA_FIB_F_OIF)
+ oif = nft_out(pkt);
+ else if (priv->flags & NFTA_FIB_F_IIF)
+ oif = nft_in(pkt);
+ else
+ oif = NULL;
+
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+ nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ nft_in(pkt)->ifindex);
+ return;
+ }
+
+ iph = ip_hdr(pkt->skb);
+ if (ipv4_is_zeronet(iph->saddr)) {
+ if (ipv4_is_lbcast(iph->daddr) ||
+ ipv4_is_local_multicast(iph->daddr)) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ get_ifindex(pkt->skb->dev));
+ return;
+ }
+ }
+
+ if (priv->flags & NFTA_FIB_F_MARK)
+ fl4.flowi4_mark = pkt->skb->mark;
+
+ fl4.flowi4_tos = iph->tos & DSCP_BITS;
+
+ if (priv->flags & NFTA_FIB_F_DADDR) {
+ fl4.daddr = iph->daddr;
+ fl4.saddr = get_saddr(iph->saddr);
+ } else {
+ fl4.daddr = iph->saddr;
+ fl4.saddr = get_saddr(iph->daddr);
+ }
+
+ *dest = 0;
+
+ if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
+ return;
+
+ switch (res.type) {
+ case RTN_UNICAST:
+ break;
+ case RTN_LOCAL: /* Should not see RTN_LOCAL here */
+ return;
+ default:
+ break;
+ }
+
+ if (!oif) {
+ found = FIB_RES_DEV(res);
+ goto ok;
+ }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (i = 0; i < res.fi->fib_nhs; i++) {
+ struct fib_nh *nh = &res.fi->fib_nh[i];
+
+ if (nh->nh_dev == oif) {
+ found = nh->nh_dev;
+ goto ok;
+ }
+ }
+ return;
+#else
+ found = FIB_RES_DEV(res);
+ if (found != oif)
+ return;
+#endif
+ok:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ *dest = found->ifindex;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ strncpy((char *)dest, found->name, IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval);
+
+static struct nft_expr_type nft_fib4_type;
+
+static const struct nft_expr_ops nft_fib4_type_ops = {
+ .type = &nft_fib4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib4_eval_type,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib4_ops = {
+ .type = &nft_fib4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib4_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib4_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_fib_result result;
+
+ if (!tb[NFTA_FIB_RESULT])
+ return ERR_PTR(-EINVAL);
+
+ result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+ switch (result) {
+ case NFT_FIB_RESULT_OIF:
+ return &nft_fib4_ops;
+ case NFT_FIB_RESULT_OIFNAME:
+ return &nft_fib4_ops;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return &nft_fib4_type_ops;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static struct nft_expr_type nft_fib4_type __read_mostly = {
+ .name = "fib",
+ .select_ops = &nft_fib4_select_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib4_module_init(void)
+{
+ return nft_register_expr(&nft_fib4_type);
+}
+
+static void __exit nft_fib4_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib4_type);
+}
+
+module_init(nft_fib4_module_init);
+module_exit(nft_fib4_module_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 51ced81b616c..a0ea8aad1bf1 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -31,8 +31,14 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
range.max_proto.all =
*(__be16 *)&regs->data[priv->sreg_proto_max];
}
- regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
- &range, pkt->out);
+ regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+ &range, nft_out(pkt));
+}
+
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
}
static struct nft_expr_type nft_masq_ipv4_type;
@@ -41,6 +47,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
.eval = nft_masq_ipv4_eval,
.init = nft_masq_init,
+ .destroy = nft_masq_ipv4_destroy,
.dump = nft_masq_dump,
.validate = nft_masq_validate,
};
@@ -77,5 +84,5 @@ module_init(nft_masq_ipv4_module_init);
module_exit(nft_masq_ipv4_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index c09d4381427e..1650ed23c15d 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -35,8 +35,13 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
mr.range[0].flags |= priv->flags;
- regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
- pkt->hook);
+ regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
}
static struct nft_expr_type nft_redir_ipv4_type;
@@ -45,6 +50,7 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
.eval = nft_redir_ipv4_eval,
.init = nft_redir_init,
+ .destroy = nft_redir_ipv4_destroy,
.dump = nft_redir_dump,
.validate = nft_redir_validate,
};
@@ -72,5 +78,5 @@ module_init(nft_redir_ipv4_module_init);
module_exit(nft_redir_ipv4_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 2c2553b9026c..517ce93699de 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,10 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook);
+ nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(pkt->net, pkt->skb, pkt->hook);
+ nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
break;
default:
break;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 96b8e2b95731..86cca610f4c2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -609,15 +609,15 @@ int ping_getfrag(void *from, char *to,
fraglen -= sizeof(struct icmphdr);
if (fraglen < 0)
BUG();
- if (csum_and_copy_from_iter(to + sizeof(struct icmphdr),
+ if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr),
fraglen, &pfh->wcheck,
- &pfh->msg->msg_iter) != fraglen)
+ &pfh->msg->msg_iter))
return -EFAULT;
} else if (offset < sizeof(struct icmphdr)) {
BUG();
} else {
- if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck,
- &pfh->msg->msg_iter) != fraglen)
+ if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck,
+ &pfh->msg->msg_iter))
return -EFAULT;
}
@@ -793,7 +793,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
- inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
+ inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
+ sk->sk_uid);
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ecbe5a7c2d6d..4e49e5cb001c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -41,7 +41,7 @@
#include <linux/atomic.h>
#include <asm/byteorder.h>
#include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <linux/stddef.h>
#include <linux/slab.h>
@@ -89,9 +89,10 @@ struct raw_frag_vec {
int hlen;
};
-static struct raw_hashinfo raw_v4_hashinfo = {
+struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
+EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
int raw_hash_sk(struct sock *sk)
{
@@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
unsigned short num, __be32 raddr, __be32 laddr, int dif)
{
sk_for_each_from(sk) {
@@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
found:
return sk;
}
+EXPORT_SYMBOL_GPL(__raw_v4_lookup);
/*
* 0 - deliver
@@ -604,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
- daddr, saddr, 0, 0);
+ daddr, saddr, 0, 0, sk->sk_uid);
if (!inet->hdrincl) {
rfv.msg = msg;
@@ -693,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int ret = -EINVAL;
int chk_addr_ret;
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
+
+ if (sk->sk_bound_dev_if)
+ tb_id = l3mdev_fib_table_by_index(sock_net(sk),
+ sk->sk_bound_dev_if) ? : tb_id;
+
+ chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr,
+ tb_id);
+
ret = -EADDRNOTAVAIL;
if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -912,6 +922,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg
}
#endif
+int raw_abort(struct sock *sk, int err)
+{
+ lock_sock(sk);
+
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ __udp_disconnect(sk, 0);
+
+ release_sock(sk);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(raw_abort);
+
struct proto raw_prot = {
.name = "RAW",
.owner = THIS_MODULE,
@@ -937,6 +961,7 @@ struct proto raw_prot = {
.compat_getsockopt = compat_raw_getsockopt,
.compat_ioctl = compat_raw_ioctl,
#endif
+ .diag_destroy = raw_abort,
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
new file mode 100644
index 000000000000..e1a51ca68d23
--- /dev/null
+++ b/net/ipv4/raw_diag.c
@@ -0,0 +1,266 @@
+#include <linux/module.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+#include <net/inet_sock.h>
+#include <net/raw.h>
+#include <net/rawv6.h>
+
+#ifdef pr_fmt
+# undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static struct raw_hashinfo *
+raw_get_hashinfo(const struct inet_diag_req_v2 *r)
+{
+ if (r->sdiag_family == AF_INET) {
+ return &raw_v4_hashinfo;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (r->sdiag_family == AF_INET6) {
+ return &raw_v6_hashinfo;
+#endif
+ } else {
+ pr_warn_once("Unexpected inet family %d\n",
+ r->sdiag_family);
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+ }
+}
+
+/*
+ * Due to requirement of not breaking user API we can't simply
+ * rename @pad field in inet_diag_req_v2 structure, instead
+ * use helper to figure it out.
+ */
+
+static struct sock *raw_lookup(struct net *net, struct sock *from,
+ const struct inet_diag_req_v2 *req)
+{
+ struct inet_diag_req_raw *r = (void *)req;
+ struct sock *sk = NULL;
+
+ if (r->sdiag_family == AF_INET)
+ sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
+ r->id.idiag_dst[0],
+ r->id.idiag_src[0],
+ r->id.idiag_if);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
+ (const struct in6_addr *)r->id.idiag_src,
+ (const struct in6_addr *)r->id.idiag_dst,
+ r->id.idiag_if);
+#endif
+ return sk;
+}
+
+static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
+{
+ struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+ struct sock *sk = NULL, *s;
+ int slot;
+
+ if (IS_ERR(hashinfo))
+ return ERR_CAST(hashinfo);
+
+ read_lock(&hashinfo->lock);
+ for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
+ sk_for_each(s, &hashinfo->ht[slot]) {
+ sk = raw_lookup(net, s, r);
+ if (sk) {
+ /*
+ * Grab it and keep until we fill
+ * diag meaage to be reported, so
+ * caller should call sock_put then.
+ * We can do that because we're keeping
+ * hashinfo->lock here.
+ */
+ sock_hold(sk);
+ goto out_unlock;
+ }
+ }
+ }
+out_unlock:
+ read_unlock(&hashinfo->lock);
+
+ return sk ? sk : ERR_PTR(-ENOENT);
+}
+
+static int raw_diag_dump_one(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ const struct inet_diag_req_v2 *r)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sk_buff *rep;
+ struct sock *sk;
+ int err;
+
+ sk = raw_sock_get(net, r);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+ sizeof(struct inet_diag_meminfo) + 64,
+ GFP_KERNEL);
+ if (!rep) {
+ sock_put(sk);
+ return -ENOMEM;
+ }
+
+ err = inet_sk_diag_fill(sk, NULL, rep, r,
+ sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0, nlh,
+ netlink_net_capable(in_skb, CAP_NET_ADMIN));
+ sock_put(sk);
+
+ if (err < 0) {
+ kfree_skb(rep);
+ return err;
+ }
+
+ err = netlink_unicast(net->diag_nlsk, rep,
+ NETLINK_CB(in_skb).portid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+ return err;
+}
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ struct nlattr *bc, bool net_admin)
+{
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
+
+ return inet_sk_diag_fill(sk, NULL, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh, net_admin);
+}
+
+static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+ struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
+ struct sock *sk = NULL;
+
+ if (IS_ERR(hashinfo))
+ return;
+
+ s_slot = cb->args[0];
+ num = s_num = cb->args[1];
+
+ read_lock(&hashinfo->lock);
+ for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
+ num = 0;
+
+ sk_for_each(sk, &hashinfo->ht[slot]) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num)
+ goto next;
+ if (sk->sk_family != r->sdiag_family)
+ goto next;
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+ if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
+ goto out_unlock;
+next:
+ num++;
+ }
+ }
+
+out_unlock:
+ read_unlock(&hashinfo->lock);
+
+ cb->args[0] = slot;
+ cb->args[1] = num;
+}
+
+static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *info)
+{
+ r->idiag_rqueue = sk_rmem_alloc_get(sk);
+ r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
+
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int raw_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *r)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sock *sk;
+ int err;
+
+ sk = raw_sock_get(net, r);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+ err = sock_diag_destroy(sk, ECONNABORTED);
+ sock_put(sk);
+ return err;
+}
+#endif
+
+static const struct inet_diag_handler raw_diag_handler = {
+ .dump = raw_diag_dump,
+ .dump_one = raw_diag_dump_one,
+ .idiag_get_info = raw_diag_get_info,
+ .idiag_type = IPPROTO_RAW,
+ .idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+ .destroy = raw_diag_destroy,
+#endif
+};
+
+static void __always_unused __check_inet_diag_req_raw(void)
+{
+ /*
+ * Make sure the two structures are identical,
+ * except the @pad field.
+ */
+#define __offset_mismatch(m1, m2) \
+ (offsetof(struct inet_diag_req_v2, m1) != \
+ offsetof(struct inet_diag_req_raw, m2))
+
+ BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) !=
+ sizeof(struct inet_diag_req_raw));
+ BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family));
+ BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol));
+ BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext));
+ BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol));
+ BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states));
+ BUILD_BUG_ON(__offset_mismatch(id, id));
+#undef __offset_mismatch
+}
+
+static int __init raw_diag_init(void)
+{
+ return inet_diag_register(&raw_diag_handler);
+}
+
+static void __exit raw_diag_exit(void)
+{
+ inet_diag_unregister(&raw_diag_handler);
+}
+
+module_init(raw_diag_init);
+module_exit(raw_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2a57566e6e91..709ffe67d1de 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -65,7 +65,7 @@
#define pr_fmt(fmt) "IPv4: " fmt
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+ const struct sock *sk,
const struct iphdr *iph,
int oif, u8 tos,
u8 prot, u32 mark, int flow_flags)
@@ -523,19 +524,21 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
flowi4_init_output(fl4, oif, mark, tos,
RT_SCOPE_UNIVERSE, prot,
flow_flags,
- iph->daddr, iph->saddr, 0, 0);
+ iph->daddr, iph->saddr, 0, 0,
+ sock_net_uid(net, sk));
}
static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct sock *sk)
{
+ const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
- __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
+ __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}
static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
@@ -552,7 +555,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
- daddr, inet->inet_saddr, 0, 0);
+ daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
rcu_read_unlock();
}
@@ -795,6 +798,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
struct rtable *rt;
struct flowi4 fl4;
const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
@@ -802,7 +806,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
rt = (struct rtable *) dst;
- __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
+ __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -1020,7 +1024,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
if (!mark)
mark = IP4_REPLY_MARK(net, skb->mark);
- __build_flow_key(&fl4, NULL, iph, oif,
+ __build_flow_key(net, &fl4, NULL, iph, oif,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
@@ -1036,7 +1040,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
if (!fl4.flowi4_mark)
fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
@@ -1055,6 +1059,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
struct rtable *rt;
struct dst_entry *odst = NULL;
bool new = false;
+ struct net *net = sock_net(sk);
bh_lock_sock(sk);
@@ -1068,7 +1073,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
}
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
rt = (struct rtable *)odst;
if (odst->obsolete && !odst->ops->check(odst, 0)) {
@@ -1108,7 +1113,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(&fl4, NULL, iph, oif,
+ __build_flow_key(net, &fl4, NULL, iph, oif,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
@@ -1123,9 +1128,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
+ struct net *net = sock_net(sk);
- __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = __ip_route_output_key(sock_net(sk), &fl4);
+ __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
+ rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
ip_rt_put(rt);
@@ -1598,6 +1604,19 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
+static void set_lwt_redirect(struct rtable *rth)
+{
+ if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_output = rth->dst.output;
+ rth->dst.output = lwtunnel_output;
+ }
+
+ if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_input = rth->dst.input;
+ rth->dst.input = lwtunnel_input;
+ }
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1687,14 +1706,7 @@ rt_cache:
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
+ set_lwt_redirect(rth);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1902,7 +1914,8 @@ local_input:
}
}
- rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
+ rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+ flags | RTCF_LOCAL, res.type,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
@@ -1921,8 +1934,18 @@ local_input:
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
+
if (do_cache) {
- if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
+ struct fib_nh *nh = &FIB_RES_NH(res);
+
+ rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+ if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+ WARN_ON(rth->dst.input == lwtunnel_input);
+ rth->dst.lwtstate->orig_input = rth->dst.input;
+ rth->dst.input = lwtunnel_input;
+ }
+
+ if (unlikely(!rt_cache_route(nh, rth))) {
rth->dst.flags |= DST_NOCACHE;
rt_add_uncached_list(rth);
}
@@ -1982,25 +2005,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
*/
if (ipv4_is_multicast(daddr)) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ int our = 0;
+
+ if (in_dev)
+ our = ip_check_mc_rcu(in_dev, daddr, saddr,
+ ip_hdr(skb)->protocol);
+
+ /* check l3 master if no match yet */
+ if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+ struct in_device *l3_in_dev;
- if (in_dev) {
- int our = ip_check_mc_rcu(in_dev, daddr, saddr,
- ip_hdr(skb)->protocol);
- if (our
+ l3_in_dev = __in_dev_get_rcu(skb->dev);
+ if (l3_in_dev)
+ our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
+ ip_hdr(skb)->protocol);
+ }
+
+ res = -EINVAL;
+ if (our
#ifdef CONFIG_IP_MROUTE
- ||
- (!ipv4_is_local_multicast(daddr) &&
- IN_DEV_MFORWARD(in_dev))
+ ||
+ (!ipv4_is_local_multicast(daddr) &&
+ IN_DEV_MFORWARD(in_dev))
#endif
- ) {
- int res = ip_route_input_mc(skb, daddr, saddr,
- tos, dev, our);
- rcu_read_unlock();
- return res;
- }
+ ) {
+ res = ip_route_input_mc(skb, daddr, saddr,
+ tos, dev, our);
}
rcu_read_unlock();
- return -EINVAL;
+ return res;
}
res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
rcu_read_unlock();
@@ -2140,8 +2173,7 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
- if (lwtunnel_output_redirect(rth->dst.lwtstate))
- rth->dst.output = lwtunnel_output;
+ set_lwt_redirect(rth);
return rth;
}
@@ -2268,7 +2300,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
res.fi = NULL;
res.table = NULL;
if (fl4->flowi4_oif &&
- !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ (ipv4_is_multicast(fl4->daddr) ||
+ !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2439,7 +2472,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
r->rtm_tos = fl4->flowi4_tos;
- r->rtm_table = table_id;
+ r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure;
r->rtm_type = rt->rt_type;
@@ -2495,6 +2528,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
goto nla_put_failure;
+ if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+ nla_put_u32(skb, RTA_UID,
+ from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+ goto nla_put_failure;
+
error = rt->dst.error;
if (rt_is_input_route(rt)) {
@@ -2547,6 +2585,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
int mark;
struct sk_buff *skb;
u32 table_id = RT_TABLE_MAIN;
+ kuid_t uid;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err < 0)
@@ -2574,6 +2613,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ if (tb[RTA_UID])
+ uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+ else
+ uid = (iif ? INVALID_UID : current_uid());
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
@@ -2581,6 +2624,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
fl4.flowi4_tos = rtm->rtm_tos;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
+ fl4.flowi4_uid = uid;
if (iif) {
struct net_device *dev;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e3c4043c27de..3e88467d70ee 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->ts_off = 0;
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
@@ -372,7 +373,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
- ireq->ir_loc_addr, th->source, th->dest);
+ ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b25de2..b2fa498b15d1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -433,13 +433,6 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &tcp_adv_win_scale_max,
},
{
- .procname = "tcp_tw_reuse",
- .data = &sysctl_tcp_tw_reuse,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_frto",
.data = &sysctl_tcp_frto,
.maxlen = sizeof(int),
@@ -958,7 +951,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec,
+ },
+ {
+ .procname = "tcp_tw_reuse",
+ .data = &init_net.ipv4.sysctl_tcp_tw_reuse,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 814af89c1bd3..4a044964da66 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,9 +277,8 @@
#include <net/ip.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
-#include <asm/unaligned.h>
#include <net/busy_poll.h>
int sysctl_tcp_min_tso_segs __read_mostly = 2;
@@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- u64_stats_init(&tp->syncp);
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
@@ -665,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
if (tcp_should_autocork(sk, skb, size_goal)) {
/* avoid atomic op if TSQ_THROTTLED bit is already set */
- if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
+ if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
- set_bit(TSQ_THROTTLED, &tp->tsq_flags);
+ set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -998,8 +996,11 @@ do_error:
goto out;
out_err:
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+ err == -EAGAIN)) {
sk->sk_write_space(sk);
+ tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+ }
return sk_stream_error(sk, flags, err);
}
@@ -1333,8 +1334,11 @@ do_error:
out_err:
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+ err == -EAGAIN)) {
sk->sk_write_space(sk);
+ tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+ }
release_sock(sk);
return err;
}
@@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect);
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
- ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+ (sk->sk_state != TCP_LISTEN);
}
static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
@@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
+static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
+ struct tcp_info *info)
+{
+ u64 stats[__TCP_CHRONO_MAX], total = 0;
+ enum tcp_chrono i;
+
+ for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
+ stats[i] = tp->chrono_stat[i - 1];
+ if (i == tp->chrono_type)
+ stats[i] += tcp_time_stamp - tp->chrono_start;
+ stats[i] *= USEC_PER_SEC / HZ;
+ total += stats[i];
+ }
+
+ info->tcpi_busy_time = total;
+ info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
+ info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
+}
+
/* Return information about state of tcp endpoint in API format. */
void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp, intv;
- unsigned int start;
- int notsent_bytes;
u64 rate64;
+ bool slow;
u32 rate;
memset(info, 0, sizeof(*info));
@@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_state = sk_state_load(sk);
+ /* Report meaningful fields for all TCP states, including listeners */
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ info->tcpi_pacing_rate = rate64;
+
+ rate = READ_ONCE(sk->sk_max_pacing_rate);
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ info->tcpi_max_pacing_rate = rate64;
+
+ info->tcpi_reordering = tp->reordering;
+ info->tcpi_snd_cwnd = tp->snd_cwnd;
+
+ if (info->tcpi_state == TCP_LISTEN) {
+ /* listeners aliased fields :
+ * tcpi_unacked -> Number of children ready for accept()
+ * tcpi_sacked -> max backlog
+ */
+ info->tcpi_unacked = sk->sk_ack_backlog;
+ info->tcpi_sacked = sk->sk_max_ack_backlog;
+ return;
+ }
info->tcpi_ca_state = icsk->icsk_ca_state;
info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = icsk->icsk_probes_out;
@@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
- if (info->tcpi_state == TCP_LISTEN) {
- info->tcpi_unacked = sk->sk_ack_backlog;
- info->tcpi_sacked = sk->sk_max_ack_backlog;
- } else {
- info->tcpi_unacked = tp->packets_out;
- info->tcpi_sacked = tp->sacked_out;
- }
+ info->tcpi_unacked = tp->packets_out;
+ info->tcpi_sacked = tp->sacked_out;
+
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
info->tcpi_fackets = tp->fackets_out;
@@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rtt = tp->srtt_us >> 3;
info->tcpi_rttvar = tp->mdev_us >> 2;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
- info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
- info->tcpi_reordering = tp->reordering;
info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
- rate = READ_ONCE(sk->sk_pacing_rate);
- rate64 = rate != ~0U ? rate : ~0ULL;
- put_unaligned(rate64, &info->tcpi_pacing_rate);
+ slow = lock_sock_fast(sk);
- rate = READ_ONCE(sk->sk_max_pacing_rate);
- rate64 = rate != ~0U ? rate : ~0ULL;
- put_unaligned(rate64, &info->tcpi_max_pacing_rate);
+ info->tcpi_bytes_acked = tp->bytes_acked;
+ info->tcpi_bytes_received = tp->bytes_received;
+ info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
+ tcp_get_info_chrono_stats(tp, info);
+
+ unlock_sock_fast(sk, slow);
- do {
- start = u64_stats_fetch_begin_irq(&tp->syncp);
- put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
- put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
- } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
- notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
- info->tcpi_notsent_bytes = max(0, notsent_bytes);
-
info->tcpi_min_rtt = tcp_min_rtt(tp);
info->tcpi_data_segs_in = tp->data_segs_in;
info->tcpi_data_segs_out = tp->data_segs_out;
@@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
if (rate && intv) {
rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
do_div(rate64, intv);
- put_unaligned(rate64, &info->tcpi_delivery_rate);
+ info->tcpi_delivery_rate = rate64;
}
}
EXPORT_SYMBOL_GPL(tcp_get_info);
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *stats;
+ struct tcp_info info;
+
+ stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+ if (!stats)
+ return NULL;
+
+ tcp_get_info_chrono_stats(tp, &info);
+ nla_put_u64_64bit(stats, TCP_NLA_BUSY,
+ info.tcpi_busy_time, TCP_NLA_PAD);
+ nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
+ info.tcpi_rwnd_limited, TCP_NLA_PAD);
+ nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
+ info.tcpi_sndbuf_limited, TCP_NLA_PAD);
+ return stats;
+}
+
static int do_tcp_getsockopt(struct sock *sk, int level,
int optname, char __user *optval, int __user *optlen)
{
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 0ea66c2c9344..b89bce4c721e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -14,6 +14,36 @@
* observed, or adjust the sending rate if it estimates there is a
* traffic policer, in order to keep the drop rate reasonable.
*
+ * Here is a state transition diagram for BBR:
+ *
+ * |
+ * V
+ * +---> STARTUP ----+
+ * | | |
+ * | V |
+ * | DRAIN ----+
+ * | | |
+ * | V |
+ * +---> PROBE_BW ----+
+ * | ^ | |
+ * | | | |
+ * | +----+ |
+ * | |
+ * +---- PROBE_RTT <--+
+ *
+ * A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
+ * When it estimates the pipe is full, it enters DRAIN to drain the queue.
+ * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
+ * A long-lived BBR flow spends the vast majority of its time remaining
+ * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
+ * in a fair manner, with a small, bounded queue. *If* a flow has been
+ * continuously sending for the entire min_rtt window, and hasn't seen an RTT
+ * sample that matches or decreases its min_rtt estimate for 10 seconds, then
+ * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
+ * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
+ * we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
+ * otherwise we enter STARTUP to try to fill the pipe.
+ *
* BBR is described in detail in:
* "BBR: Congestion-Based Congestion Control",
* Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
@@ -51,7 +81,7 @@ enum bbr_mode {
BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
BBR_DRAIN, /* drain any queue created during startup */
BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
- BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */
+ BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
};
/* BBR congestion control block */
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f9038d6b109e..79c4817abc94 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
{
int ret = 0;
- /* all algorithms must implement ssthresh and cong_avoid ops */
- if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) {
+ /* all algorithms must implement these */
+ if (!ca->ssthresh || !ca->undo_cwnd ||
+ !(ca->cong_avoid || ca->cong_control)) {
pr_err("%s does not implement required ops\n", ca->name);
return -EINVAL;
}
@@ -443,10 +444,19 @@ u32 tcp_reno_ssthresh(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
+u32 tcp_reno_undo_cwnd(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+}
+EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
+
struct tcp_congestion_ops tcp_reno = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
+ .undo_cwnd = tcp_reno_undo_cwnd,
};
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ab37c6775630..5f5e5936760e 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -335,6 +335,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.get_info = dctcp_get_info,
.owner = THIS_MODULE,
.name = "dctcp-reno",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4e777a3243f9..dd2560c83a85 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
struct tcp_fastopen_cookie tmp;
if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
- struct in6_addr *buf = (struct in6_addr *) tmp.val;
+ struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
@@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
* scaled. So correct it appropriately.
*/
tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+ tp->max_window = tp->snd_wnd;
/* Activate the retrans timer so that SYNACK can be retransmitted.
* The request socket is not added to the ehash
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index db7842495a64..6d9879e93648 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,6 +94,7 @@ static const struct hstcp_aimd_val {
struct hstcp {
u32 ai;
+ u32 loss_cwnd;
};
static void hstcp_init(struct sock *sk)
@@ -150,16 +151,24 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static u32 hstcp_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- const struct hstcp *ca = inet_csk_ca(sk);
+ struct hstcp *ca = inet_csk_ca(sk);
+ ca->loss_cwnd = tp->snd_cwnd;
/* Do multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
+static u32 hstcp_cwnd_undo(struct sock *sk)
+{
+ const struct hstcp *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
.init = hstcp_init,
.ssthresh = hstcp_ssthresh,
+ .undo_cwnd = hstcp_cwnd_undo,
.cong_avoid = hstcp_cong_avoid,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 083831e359df..0f7175c3338e 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
.init = hybla_init,
.ssthresh = tcp_reno_ssthresh,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = hybla_cong_avoid,
.set_state = hybla_state,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index c8e6d86be114..60352ff4f5a8 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,6 +48,7 @@ struct illinois {
u32 end_seq; /* right edge of current RTT */
u32 alpha; /* Additive increase */
u32 beta; /* Muliplicative decrease */
+ u32 loss_cwnd; /* cwnd on loss */
u16 acked; /* # packets acked by current ACK */
u8 rtt_above; /* average rtt has gone above threshold */
u8 rtt_low; /* # of rtts measurements below threshold */
@@ -296,10 +297,18 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
+ ca->loss_cwnd = tp->snd_cwnd;
/* Multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
}
+static u32 tcp_illinois_cwnd_undo(struct sock *sk)
+{
+ const struct illinois *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
/* Extract info for Tcp socket info provided via netlink. */
static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
@@ -327,6 +336,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
.init = tcp_illinois_init,
.ssthresh = tcp_illinois_ssthresh,
+ .undo_cwnd = tcp_illinois_cwnd_undo,
.cong_avoid = tcp_illinois_cong_avoid,
.set_state = tcp_illinois_state,
.get_info = tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c71d49ce0c93..41dcbd568cbe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,6 +85,7 @@ int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+EXPORT_SYMBOL(sysctl_tcp_timestamps);
/* rfc5961 challenge ack rate limiting */
int sysctl_tcp_challenge_ack_limit = 1000;
@@ -2414,10 +2415,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->undo_cwnd)
- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
- else
- tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+ tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
@@ -3201,6 +3199,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->lost_skb_hint = NULL;
}
+ if (!skb)
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+
if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
tp->snd_up = tp->snd_una;
@@ -3371,9 +3372,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
u32 delta = ack - tp->snd_una;
sock_owned_by_me((struct sock *)tp);
- u64_stats_update_begin_raw(&tp->syncp);
tp->bytes_acked += delta;
- u64_stats_update_end_raw(&tp->syncp);
tp->snd_una = ack;
}
@@ -3383,9 +3382,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
u32 delta = seq - tp->rcv_nxt;
sock_owned_by_me((struct sock *)tp);
- u64_stats_update_begin_raw(&tp->syncp);
tp->bytes_received += delta;
- u64_stats_update_end_raw(&tp->syncp);
tp->rcv_nxt = seq;
}
@@ -5081,10 +5078,13 @@ static void tcp_check_space(struct sock *sk)
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
/* pairs with tcp_poll() */
- smp_mb__after_atomic();
+ smp_mb();
if (sk->sk_socket &&
- test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
tcp_new_space(sk);
+ if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+ }
}
}
@@ -6318,13 +6318,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
-
- /* Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ if (sk_acceptq_is_full(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
}
@@ -6334,6 +6328,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
tcp_rsk(req)->af_specific = af_ops;
+ tcp_rsk(req)->ts_off = 0;
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
@@ -6355,6 +6350,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
+ if (isn && tmp_opt.tstamp_ok)
+ af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+
if (!want_cookie && !isn) {
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
@@ -6395,7 +6393,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb);
+ isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
}
if (!dst) {
dst = af_ops->route_req(sk, &fl, req, NULL);
@@ -6407,6 +6405,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (want_cookie) {
isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+ tcp_rsk(req)->ts_off = 0;
req->cookie_ts = tmp_opt.tstamp_ok;
if (!tmp_opt.tstamp_ok)
inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2259114c7242..fe9da4fb96bf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,7 +84,6 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
-int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG
@@ -95,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
{
return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr,
tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
+ tcp_hdr(skb)->source, tsoff);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
- (!twp || (sysctl_tcp_tw_reuse &&
+ (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
@@ -237,7 +236,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
- usin->sin_port);
+ usin->sin_port,
+ &tp->tsoffset);
inet->inet_id = tp->write_seq ^ jiffies;
@@ -442,7 +442,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
- if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
sock_hold(sk);
}
goto out;
@@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
arg.tos = ip_hdr(skb)->tos;
+ arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -711,7 +712,7 @@ out:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct net *net,
+static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
@@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net,
#endif
];
} rep;
+ struct net *net = sock_net(sk);
struct ip_reply_arg arg;
memset(&rep.th, 0, sizeof(struct tcphdr));
@@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
+ arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(sock_net(sk), skb,
+ tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
@@ -818,10 +821,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
- tcp_v4_send_ack(sock_net(sk), skb, seq,
+ tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp,
+ tcp_time_stamp + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1908,7 +1911,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock_bh(&ilb->lock);
+ spin_lock(&ilb->lock);
sk = sk_head(&ilb->head);
st->offset = 0;
goto get_sk;
@@ -1925,7 +1928,7 @@ get_sk:
if (sk->sk_family == st->family)
return sk;
}
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
st->offset = 0;
if (++st->bucket < INET_LHTABLE_SIZE)
goto get_head;
@@ -2133,7 +2136,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
@@ -2452,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+ net->ipv4.sysctl_tcp_tw_reuse = 0;
return 0;
fail:
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c67ece1390c2..046fd3910873 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
static struct tcp_congestion_ops tcp_lp __read_mostly = {
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_lp_cong_avoid,
.pkts_acked = tcp_lp_pkts_acked,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index bf1f3b2b29d1..ba8f02d0f283 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -606,7 +606,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
return ret;
}
-EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
{
@@ -742,14 +741,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
rcu_read_unlock();
}
-static struct genl_family tcp_metrics_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = TCP_METRICS_GENL_NAME,
- .version = TCP_METRICS_GENL_VERSION,
- .maxattr = TCP_METRICS_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family tcp_metrics_nl_family;
static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
@@ -1116,6 +1108,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
},
};
+static struct genl_family tcp_metrics_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = TCP_METRICS_GENL_NAME,
+ .version = TCP_METRICS_GENL_VERSION,
+ .maxattr = TCP_METRICS_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tcp_metrics_nl_ops,
+ .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
+};
+
static unsigned int tcpmhash_entries;
static int __init set_tcpmhash_entries(char *str)
{
@@ -1179,8 +1182,7 @@ void __init tcp_metrics_init(void)
if (ret < 0)
panic("Could not allocate the tcp_metrics hash table\n");
- ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
- tcp_metrics_nl_ops);
+ ret = genl_register_family(&tcp_metrics_nl_family);
if (ret < 0)
panic("Could not register tcp_metrics generic netlink\n");
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6234ebaa7db1..28ce5ee831f5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -532,7 +532,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
- newtp->tsoffset = 0;
+ newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
if (newtp->af_specific->md5_lookup(sk, newsk))
@@ -581,6 +581,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
+ if (tmp_opt.rcv_tsecr)
+ tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
/* We do not store true stamp, but it is not required,
* it can be estimated (approximately)
* from another data.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 896e9dfbdb5c..8ce50dc3ab8c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req,
}
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = tcp_skb_timestamp(skb);
+ opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
opts->tsecr = req->ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -769,25 +769,27 @@ static void tcp_tasklet_func(unsigned long data)
list_del(&tp->tsq_node);
sk = (struct sock *)tp;
- bh_lock_sock(sk);
-
- if (!sock_owned_by_user(sk)) {
- tcp_tsq_handler(sk);
- } else {
- /* defer the work to tcp_release_cb() */
- set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
+ smp_mb__before_atomic();
+ clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
+
+ if (!sk->sk_lock.owned &&
+ test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk)) {
+ clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
+ tcp_tsq_handler(sk);
+ }
+ bh_unlock_sock(sk);
}
- bh_unlock_sock(sk);
- clear_bit(TSQ_QUEUED, &tp->tsq_flags);
sk_free(sk);
}
}
-#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
- (1UL << TCP_WRITE_TIMER_DEFERRED) | \
- (1UL << TCP_DELACK_TIMER_DEFERRED) | \
- (1UL << TCP_MTU_REDUCED_DEFERRED))
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
+ TCPF_WRITE_TIMER_DEFERRED | \
+ TCPF_DELACK_TIMER_DEFERRED | \
+ TCPF_MTU_REDUCED_DEFERRED)
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
@@ -797,18 +799,17 @@ static void tcp_tasklet_func(unsigned long data)
*/
void tcp_release_cb(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
unsigned long flags, nflags;
/* perform an atomic operation only if at least one flag is set */
do {
- flags = tp->tsq_flags;
+ flags = sk->sk_tsq_flags;
if (!(flags & TCP_DEFERRED_ALL))
return;
nflags = flags & ~TCP_DEFERRED_ALL;
- } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
+ } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
- if (flags & (1UL << TCP_TSQ_DEFERRED))
+ if (flags & TCPF_TSQ_DEFERRED)
tcp_tsq_handler(sk);
/* Here begins the tricky part :
@@ -822,15 +823,15 @@ void tcp_release_cb(struct sock *sk)
*/
sock_release_ownership(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
+ if (flags & TCPF_WRITE_TIMER_DEFERRED) {
tcp_write_timer_handler(sk);
__sock_put(sk);
}
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
+ if (flags & TCPF_DELACK_TIMER_DEFERRED) {
tcp_delack_timer_handler(sk);
__sock_put(sk);
}
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
+ if (flags & TCPF_MTU_REDUCED_DEFERRED) {
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
@@ -860,6 +861,7 @@ void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long flags, nval, oval;
int wmem;
/* Keep one reference on sk_wmem_alloc.
@@ -877,16 +879,25 @@ void tcp_wfree(struct sk_buff *skb)
if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
goto out;
- if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
- !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
- unsigned long flags;
+ for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
struct tsq_tasklet *tsq;
+ bool empty;
+
+ if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
+ goto out;
+
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+ nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+ if (nval != oval)
+ continue;
/* queue this socket to tasklet queue */
local_irq_save(flags);
tsq = this_cpu_ptr(&tsq_tasklet);
+ empty = list_empty(&tsq->head);
list_add(&tp->tsq_node, &tsq->head);
- tasklet_schedule(&tsq->tasklet);
+ if (empty)
+ tasklet_schedule(&tsq->tasklet);
local_irq_restore(flags);
return;
}
@@ -1027,7 +1038,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
/* Our usage of tstamp should remain private */
- skb->tstamp.tv64 = 0;
+ skb->tstamp = 0;
/* Cleanup our debris for IP stacks */
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -1514,6 +1525,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
tcp_cwnd_application_limited(sk);
+
+ /* The following conditions together indicate the starvation
+ * is caused by insufficient sender buffer:
+ * 1) just sent some data (see tcp_write_xmit)
+ * 2) not cwnd limited (this else condition)
+ * 3) no more data to send (null tcp_send_head )
+ * 4) application is hitting buffer limit (SOCK_NOSPACE)
+ */
+ if (!tcp_send_head(sk) && sk->sk_socket &&
+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
+ (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+ tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
}
@@ -1910,26 +1933,26 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
*/
static int tcp_mtu_probe(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *nskb, *next;
struct net *net = sock_net(sk);
- int len;
int probe_size;
int size_needed;
- int copy;
+ int copy, len;
int mss_now;
int interval;
/* Not currently probing/verifying,
* not in recovery,
* have enough cwnd, and
- * not SACKing (the variable headers throw things off) */
- if (!icsk->icsk_mtup.enabled ||
- icsk->icsk_mtup.probe_size ||
- inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
- tp->snd_cwnd < 11 ||
- tp->rx_opt.num_sacks || tp->rx_opt.dsack)
+ * not SACKing (the variable headers throw things off)
+ */
+ if (likely(!icsk->icsk_mtup.enabled ||
+ icsk->icsk_mtup.probe_size ||
+ inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
+ tp->snd_cwnd < 11 ||
+ tp->rx_opt.num_sacks || tp->rx_opt.dsack))
return -1;
/* Use binary search for probe_size between tcp_mss_base,
@@ -2069,7 +2092,16 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit <<= factor;
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ /* Always send the 1st or 2nd skb in write queue.
+ * No need to wait for TX completion to call us back,
+ * after softirq/tasklet schedule.
+ * This helps when TX completions are delayed too much.
+ */
+ if (skb == sk->sk_write_queue.next ||
+ skb->prev == sk->sk_write_queue.next)
+ return false;
+
+ set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED, so we must
* test again the condition.
@@ -2081,6 +2113,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
return false;
}
+static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
+{
+ const u32 now = tcp_time_stamp;
+
+ if (tp->chrono_type > TCP_CHRONO_UNSPEC)
+ tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+ tp->chrono_start = now;
+ tp->chrono_type = new;
+}
+
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If there are multiple conditions worthy of tracking in a
+ * chronograph then the highest priority enum takes precedence
+ * over the other conditions. So that if something "more interesting"
+ * starts happening, stop the previous chrono and start a new one.
+ */
+ if (type > tp->chrono_type)
+ tcp_chrono_set(tp, type);
+}
+
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+
+ /* There are multiple conditions worthy of tracking in a
+ * chronograph, so that the highest priority enum takes
+ * precedence over the other conditions (see tcp_chrono_start).
+ * If a condition stops, we only stop chrono tracking if
+ * it's the "most interesting" or current chrono we are
+ * tracking and starts busy chrono if we have pending data.
+ */
+ if (tcp_write_queue_empty(sk))
+ tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
+ else if (type == tp->chrono_type)
+ tcp_chrono_set(tp, TCP_CHRONO_BUSY);
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -2103,7 +2176,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
- bool is_cwnd_limited = false;
+ bool is_cwnd_limited = false, is_rwnd_limited = false;
u32 max_segs;
sent_pkts = 0;
@@ -2140,8 +2213,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+ if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
+ is_rwnd_limited = true;
break;
+ }
if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
@@ -2167,6 +2242,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
+ if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
+ clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
if (tcp_small_queue_check(sk, skb, 0))
break;
@@ -2186,6 +2263,11 @@ repair:
break;
}
+ if (is_rwnd_limited)
+ tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
+ else
+ tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
+
if (likely(sent_pkts)) {
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += sent_pkts;
@@ -2436,9 +2518,11 @@ u32 __tcp_select_window(struct sock *sk)
int full_space = min_t(int, tp->window_clamp, allowed_space);
int window;
- if (mss > full_space)
+ if (unlikely(mss > full_space)) {
mss = full_space;
-
+ if (mss <= 0)
+ return 0;
+ }
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
@@ -2514,7 +2598,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
}
/* Collapses two adjacent SKB's during retransmission. */
-static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
+static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
@@ -2525,13 +2609,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
+ if (next_skb_size) {
+ if (next_skb_size <= skb_availroom(skb))
+ skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
+ next_skb_size);
+ else if (!skb_shift(skb, next_skb, next_skb_size))
+ return false;
+ }
tcp_highest_sack_combine(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
- skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
- next_skb_size);
-
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2560,6 +2648,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
sk_wmem_free_skb(sk, next_skb);
+ return true;
}
/* Check if coalescing SKBs is legal. */
@@ -2567,14 +2656,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
{
if (tcp_skb_pcount(skb) > 1)
return false;
- /* TODO: SACK collapsing could be used to remove this condition */
- if (skb_shinfo(skb)->nr_frags != 0)
- return false;
if (skb_cloned(skb))
return false;
if (skb == tcp_send_head(sk))
return false;
- /* Some heurestics for collapsing over SACK'd could be invented */
+ /* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -2612,16 +2698,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (space < 0)
break;
- /* Punt if not enough space exists in the first SKB for
- * the data in the second
- */
- if (skb->len > skb_availroom(to))
- break;
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
break;
- tcp_collapse_retrans(sk, to);
+ if (!tcp_collapse_retrans(sk, to))
+ break;
}
}
@@ -3123,7 +3205,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#endif
/* Do not fool tcpdump (if any), clean our debris */
- skb->tstamp.tv64 = 0;
+ skb->tstamp = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
@@ -3300,6 +3382,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
fo->copied = space;
tcp_connect_queue_skb(sk, syn_data);
+ if (syn_data->len)
+ tcp_chrono_start(sk, TCP_CHRONO_BUSY);
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
@@ -3464,8 +3548,6 @@ void tcp_send_ack(struct sock *sk)
/* We do not want pure acks influencing TCP Small Queues or fq/pacing
* too much.
* SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
- * We also avoid tcp_wfree() overhead (cache line miss accessing
- * tp->tsq_flags) by using regular sock_wfree()
*/
skb_set_tcp_pure_ack(buff);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index bf5ea9e9bbc1..f2123075ce6e 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,6 +15,10 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
+struct scalable {
+ u32 loss_cwnd;
+};
+
static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -32,12 +36,23 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ struct scalable *ca = inet_csk_ca(sk);
+
+ ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
}
+static u32 tcp_scalable_cwnd_undo(struct sock *sk)
+{
+ const struct scalable *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
.ssthresh = tcp_scalable_ssthresh,
+ .undo_cwnd = tcp_scalable_cwnd_undo,
.cong_avoid = tcp_scalable_cong_avoid,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3ea1cf804748..3705075f42c3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -310,7 +310,7 @@ static void tcp_delack_timer(unsigned long data)
inet_csk(sk)->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
- if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
sock_hold(sk);
}
bh_unlock_sock(sk);
@@ -592,7 +592,7 @@ static void tcp_write_timer(unsigned long data)
tcp_write_timer_handler(sk);
} else {
/* delegate our work to tcp_release_cb() */
- if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
sock_hold(sk);
}
bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 4c4bac1b5eab..218cfcc77650 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
static struct tcp_congestion_ops tcp_vegas __read_mostly = {
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_vegas_cong_avoid,
.pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 40171e163cff..76005d4b8dfc 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,6 +30,7 @@ struct veno {
u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */
u32 inc; /* decide whether to increase cwnd */
u32 diff; /* calculate the diff rate */
+ u32 loss_cwnd; /* cwnd when loss occured */
};
/* There are several situations when we must "re-start" Veno:
@@ -193,6 +194,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
struct veno *veno = inet_csk_ca(sk);
+ veno->loss_cwnd = tp->snd_cwnd;
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -201,9 +203,17 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
return max(tp->snd_cwnd >> 1U, 2U);
}
+static u32 tcp_veno_cwnd_undo(struct sock *sk)
+{
+ const struct veno *veno = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
+}
+
static struct tcp_congestion_ops tcp_veno __read_mostly = {
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
+ .undo_cwnd = tcp_veno_cwnd_undo,
.cong_avoid = tcp_veno_cong_avoid,
.pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4b03a2e2a050..fed66dc0e0f5 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
.init = tcp_westwood_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cwnd_event = tcp_westwood_event,
.in_ack_event = tcp_westwood_ack,
.get_info = tcp_westwood_info,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 9c5fc973267f..e6ff99c4bd3b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,6 +37,7 @@ struct yeah {
u32 fast_count;
u32 pkts_acked;
+ u32 loss_cwnd;
};
static void tcp_yeah_init(struct sock *sk)
@@ -219,13 +220,22 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
+ yeah->loss_cwnd = tp->snd_cwnd;
return max_t(int, tp->snd_cwnd - reduction, 2);
}
+static u32 tcp_yeah_cwnd_undo(struct sock *sk)
+{
+ const struct yeah *yeah = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
+}
+
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
.init = tcp_yeah_init,
.ssthresh = tcp_yeah_ssthresh,
+ .undo_cwnd = tcp_yeah_cwnd_undo,
.cong_avoid = tcp_yeah_cong_avoid,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5bab6c3f7a2f..1307a7c2e544 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -79,7 +79,7 @@
#define pr_fmt(fmt) "UDP: " fmt
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
@@ -580,7 +580,8 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
- IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
+ IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
@@ -1019,7 +1020,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
flow_flags,
- faddr, saddr, dport, inet->inet_sport);
+ faddr, saddr, dport, inet->inet_sport,
+ sk->sk_uid);
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
@@ -1172,6 +1174,181 @@ out:
return ret;
}
+/* fully reclaim rmem/fwd memory allocated for skb */
+static void udp_rmem_release(struct sock *sk, int size, int partial)
+{
+ struct udp_sock *up = udp_sk(sk);
+ int amt;
+
+ if (likely(partial)) {
+ up->forward_deficit += size;
+ size = up->forward_deficit;
+ if (size < (sk->sk_rcvbuf >> 2) &&
+ !skb_queue_empty(&sk->sk_receive_queue))
+ return;
+ } else {
+ size += up->forward_deficit;
+ }
+ up->forward_deficit = 0;
+
+ sk->sk_forward_alloc += size;
+ amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
+ sk->sk_forward_alloc -= amt;
+
+ if (amt)
+ __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
+
+ atomic_sub(size, &sk->sk_rmem_alloc);
+}
+
+/* Note: called with sk_receive_queue.lock held.
+ * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
+ * This avoids a cache line miss while receive_queue lock is held.
+ * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
+ */
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
+{
+ udp_rmem_release(sk, skb->dev_scratch, 1);
+}
+EXPORT_SYMBOL(udp_skb_destructor);
+
+/* Idea of busylocks is to let producers grab an extra spinlock
+ * to relieve pressure on the receive_queue spinlock shared by consumer.
+ * Under flood, this means that only one producer can be in line
+ * trying to acquire the receive_queue spinlock.
+ * These busylock can be allocated on a per cpu manner, instead of a
+ * per socket one (that would consume a cache line per socket)
+ */
+static int udp_busylocks_log __read_mostly;
+static spinlock_t *udp_busylocks __read_mostly;
+
+static spinlock_t *busylock_acquire(void *ptr)
+{
+ spinlock_t *busy;
+
+ busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
+ spin_lock(busy);
+ return busy;
+}
+
+static void busylock_release(spinlock_t *busy)
+{
+ if (busy)
+ spin_unlock(busy);
+}
+
+int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff_head *list = &sk->sk_receive_queue;
+ int rmem, delta, amt, err = -ENOMEM;
+ spinlock_t *busy = NULL;
+ int size;
+
+ /* try to avoid the costly atomic add/sub pair when the receive
+ * queue is full; always allow at least a packet
+ */
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+ if (rmem > sk->sk_rcvbuf)
+ goto drop;
+
+ /* Under mem pressure, it might be helpful to help udp_recvmsg()
+ * having linear skbs :
+ * - Reduce memory overhead and thus increase receive queue capacity
+ * - Less cache line misses at copyout() time
+ * - Less work at consume_skb() (less alien page frag freeing)
+ */
+ if (rmem > (sk->sk_rcvbuf >> 1)) {
+ skb_condense(skb);
+
+ busy = busylock_acquire(sk);
+ }
+ size = skb->truesize;
+ /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss
+ * in udp_skb_destructor()
+ */
+ skb->dev_scratch = size;
+
+ /* we drop only if the receive buf is full and the receive
+ * queue contains some other skb
+ */
+ rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+ if (rmem > (size + sk->sk_rcvbuf))
+ goto uncharge_drop;
+
+ spin_lock(&list->lock);
+ if (size >= sk->sk_forward_alloc) {
+ amt = sk_mem_pages(size);
+ delta = amt << SK_MEM_QUANTUM_SHIFT;
+ if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
+ err = -ENOBUFS;
+ spin_unlock(&list->lock);
+ goto uncharge_drop;
+ }
+
+ sk->sk_forward_alloc += delta;
+ }
+
+ sk->sk_forward_alloc -= size;
+
+ /* no need to setup a destructor, we will explicitly release the
+ * forward allocated memory on dequeue
+ */
+ sock_skb_set_dropcount(sk, skb);
+
+ __skb_queue_tail(list, skb);
+ spin_unlock(&list->lock);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk);
+
+ busylock_release(busy);
+ return 0;
+
+uncharge_drop:
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+
+drop:
+ atomic_inc(&sk->sk_drops);
+ busylock_release(busy);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
+
+void udp_destruct_sock(struct sock *sk)
+{
+ /* reclaim completely the forward allocated memory */
+ unsigned int total = 0;
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ total += skb->truesize;
+ kfree_skb(skb);
+ }
+ udp_rmem_release(sk, total, 0);
+
+ inet_sock_destruct(sk);
+}
+EXPORT_SYMBOL_GPL(udp_destruct_sock);
+
+int udp_init_sock(struct sock *sk)
+{
+ sk->sk_destruct = udp_destruct_sock;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_init_sock);
+
+void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
+{
+ if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
+ bool slow = lock_sock_fast(sk);
+
+ sk_peek_offset_bwd(sk, len);
+ unlock_sock_fast(sk, slow);
+ }
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_consume_udp);
+
/**
* first_packet_length - return length of first packet in receive queue
* @sk: socket
@@ -1181,12 +1358,11 @@ out:
*/
static int first_packet_length(struct sock *sk)
{
- struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+ struct sk_buff_head *rcvq = &sk->sk_receive_queue;
struct sk_buff *skb;
+ int total = 0;
int res;
- __skb_queue_head_init(&list_kill);
-
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
@@ -1196,18 +1372,13 @@ static int first_packet_length(struct sock *sk)
IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
__skb_unlink(skb, rcvq);
- __skb_queue_tail(&list_kill, skb);
+ total += skb->truesize;
+ kfree_skb(skb);
}
res = skb ? skb->len : -1;
+ if (total)
+ udp_rmem_release(sk, total, 1);
spin_unlock_bh(&rcvq->lock);
-
- if (!skb_queue_empty(&list_kill)) {
- bool slow = lock_sock_fast(sk);
-
- __skb_queue_purge(&list_kill);
- sk_mem_reclaim_partial(sk);
- unlock_sock_fast(sk, slow);
- }
return res;
}
@@ -1256,15 +1427,13 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
- bool slow;
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len, addr_len);
try_again:
peeking = off = sk_peek_offset(sk, flags);
- skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
- &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -1281,7 +1450,8 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
+ if (copied < ulen || peeking ||
+ (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
@@ -1297,13 +1467,12 @@ try_again:
}
if (unlikely(err)) {
- trace_kfree_skb(skb, udp_recvmsg);
if (!peeked) {
atomic_inc(&sk->sk_drops);
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
}
- skb_free_datagram_locked(sk, skb);
+ kfree_skb(skb);
return err;
}
@@ -1322,22 +1491,21 @@ try_again:
*addr_len = sizeof(*sin);
}
if (inet->cmsg_flags)
- ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off);
+ ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
err = copied;
if (flags & MSG_TRUNC)
err = ulen;
- __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
+ skb_consume_udp(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
- slow = lock_sock_fast(sk);
- if (!skb_kill_datagram(sk, skb, flags)) {
+ if (!__sk_queue_drop_skb(sk, skb, flags)) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
}
- unlock_sock_fast(sk, slow);
+ kfree_skb(skb);
/* starting over for a new packet, but check if we need to yield */
cond_resched();
@@ -1463,9 +1631,11 @@ int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
sk_incoming_cpu_update(sk);
+ } else {
+ sk_mark_napi_id_once(sk, skb);
}
- rc = __sock_queue_rcv_skb(sk, skb);
+ rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1480,7 +1650,6 @@ int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
return 0;
-
}
static struct static_key udp_encap_needed __read_mostly;
@@ -1502,7 +1671,6 @@ EXPORT_SYMBOL(udp_encap_enable);
int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
- int rc;
int is_udplite = IS_UDPLITE(sk);
/*
@@ -1589,25 +1757,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
udp_csum_pull_header(skb);
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- is_udplite);
- goto drop;
- }
-
- rc = 0;
ipv4_pktinfo_prepare(sk, skb);
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- rc = __udp_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
- bh_unlock_sock(sk);
- goto drop;
- }
- bh_unlock_sock(sk);
-
- return rc;
+ return __udp_queue_rcv_skb(sk, skb);
csum_error:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -2217,13 +2369,13 @@ struct proto udp_prot = {
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_init_sock,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
- .backlog_rcv = __udp_queue_rcv_skb,
.release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
@@ -2512,6 +2664,7 @@ EXPORT_SYMBOL(udp_flow_hashrnd);
void __init udp_init(void)
{
unsigned long limit;
+ unsigned int i;
udp_table_init(&udp_table, "UDP");
limit = nr_free_buffer_pages() / 8;
@@ -2522,4 +2675,13 @@ void __init udp_init(void)
sysctl_udp_rmem_min = SK_MEM_QUANTUM;
sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+ /* 16 spinlocks per cpu */
+ udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
+ udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
+ GFP_KERNEL);
+ if (!udp_busylocks)
+ panic("UDP: failed to alloc udp_busylocks\n");
+ for (i = 0; i < (1U << udp_busylocks_log); i++)
+ spin_lock_init(udp_busylocks + i);
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index ff450c2aad9b..59f10fe9782e 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -50,10 +50,11 @@ struct proto udplite_prot = {
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
- .backlog_rcv = __udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2343e4f2e0bf..ec1267e2bd1f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -289,4 +289,39 @@ config IPV6_PIMSM_V2
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
If unsure, say N.
+config IPV6_SEG6_LWTUNNEL
+ bool "IPv6: Segment Routing Header encapsulation support"
+ depends on IPV6
+ select LWTUNNEL
+ ---help---
+ Support for encapsulation of packets within an outer IPv6
+ header and a Segment Routing Header using the lightweight
+ tunnels mechanism.
+
+ If unsure, say N.
+
+config IPV6_SEG6_INLINE
+ bool "IPv6: direct Segment Routing Header insertion "
+ depends on IPV6_SEG6_LWTUNNEL
+ ---help---
+ Support for direct insertion of the Segment Routing Header,
+ also known as inline mode. Be aware that direct insertion of
+ extension headers (as opposed to encapsulation) may break
+ multiple mechanisms such as PMTUD or IPSec AH. Use this feature
+ only if you know exactly what you are doing.
+
+ If unsure, say N.
+
+config IPV6_SEG6_HMAC
+ bool "IPv6: Segment Routing HMAC support"
+ depends on IPV6
+ select CRYPTO_HMAC
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ ---help---
+ Support for HMAC signature generation and verification
+ of SR-enabled packets.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index c174ccb340a1..a9e9fec387ce 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o
+ udp_offload.o seg6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -23,6 +23,8 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
ipv6-$(CONFIG_NETLABEL) += calipso.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4bc5ba3ae452..f60e88e56255 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -238,6 +238,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.use_oif_addrs_only = 0,
.ignore_routes_with_linkdown = 0,
.keep_addr_on_down = 0,
+ .seg6_enabled = 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ .seg6_require_hmac = 0,
+#endif
+ .enhanced_dad = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -284,6 +289,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.use_oif_addrs_only = 0,
.ignore_routes_with_linkdown = 0,
.keep_addr_on_down = 0,
+ .seg6_enabled = 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ .seg6_require_hmac = 0,
+#endif
+ .enhanced_dad = 1,
};
/* Check if a valid qdisc is available */
@@ -3727,12 +3737,21 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
{
unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
+ u64 nonce;
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
+ nonce = 0;
+ if (idev->cnf.enhanced_dad ||
+ dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
+ do
+ get_random_bytes(&nonce, 6);
+ while (nonce == 0);
+ }
+ ifp->dad_nonce = nonce;
ifp->dad_probes = idev->cnf.dad_transmits;
addrconf_mod_dad_work(ifp, rand_num);
}
@@ -3910,7 +3929,8 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any,
+ ifp->dad_nonce);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -4950,6 +4970,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
+ array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+#endif
+ array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
}
static inline size_t inet6_ifla6_size(void)
@@ -5515,8 +5540,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -5525,7 +5549,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
dev_disable_change(idev);
}
}
- rcu_read_unlock();
}
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
@@ -6042,6 +6065,29 @@ static const struct ctl_table addrconf_sysctl[] = {
},
{
+ .procname = "seg6_enabled",
+ .data = &ipv6_devconf.seg6_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ {
+ .procname = "seg6_require_hmac",
+ .data = &ipv6_devconf.seg6_require_hmac,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "enhanced_dad",
+ .data = &ipv6_devconf.enhanced_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 46ad699937fd..aa42123bc301 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -61,8 +61,9 @@
#include <net/ip6_tunnel.h>
#endif
#include <net/calipso.h>
+#include <net/seg6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/mroute6.h>
#include "ip6_offload.h"
@@ -257,6 +258,14 @@ lookup_protocol:
goto out;
}
}
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
out:
return err;
out_rcu_unlock:
@@ -678,6 +687,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
+ fl6.flowi6_uid = sk->sk_uid;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
rcu_read_lock();
@@ -990,6 +1000,10 @@ static int __init inet6_init(void)
if (err)
goto calipso_fail;
+ err = seg6_init();
+ if (err)
+ goto seg6_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -1000,8 +1014,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- calipso_exit();
+ seg6_exit();
#endif
+seg6_fail:
+ calipso_exit();
calipso_fail:
pingv6_exit();
pingv6_fail:
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0630a4d5daaa..189eb10b742d 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
else
- ip6_update_pmtu(skb, net, info, 0, 0);
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
xfrm_state_put(x);
return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ccf40550c475..a3eaafd87100 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,7 +33,7 @@
#include <net/dsfield.h>
#include <linux/errqueue.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static bool ipv6_mapped_addr_any(const struct in6_addr *a)
{
@@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
fl6->flowlabel = np->flow_label;
+ fl6->flowi6_uid = sk->sk_uid;
if (!fl6->flowi6_oif)
fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
@@ -700,7 +701,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
struct sockaddr_in6 sin6;
__be16 *ports = (__be16 *) skb_transport_header(skb);
- if (skb_transport_offset(skb) + 4 <= skb->len) {
+ if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
@@ -717,6 +718,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
}
+ if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
+ int val = opt->frag_max_size;
+
+ put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
+ }
}
void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 111ba55fd512..cbcdd5db31f4 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
else
- ip6_update_pmtu(skb, net, info, 0, 0);
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
xfrm_state_put(x);
return 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 139ceb68bd37..e4198502fd98 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -47,6 +47,11 @@
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/xfrm.h>
#endif
+#include <linux/seg6.h>
+#include <net/seg6.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
#include <linux/uaccess.h>
@@ -227,7 +232,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
ipv6h->saddr = hao->addr;
hao->addr = tmp_addr;
- if (skb->tstamp.tv64 == 0)
+ if (skb->tstamp == 0)
__net_timestamp(skb);
return true;
@@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
return -1;
}
+static void seg6_update_csum(struct sk_buff *skb)
+{
+ struct ipv6_sr_hdr *hdr;
+ struct in6_addr *addr;
+ __be32 from, to;
+
+ /* srh is at transport offset and seg_left is already decremented
+ * but daddr is not yet updated with next segment
+ */
+
+ hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+ addr = hdr->segments + hdr->segments_left;
+
+ hdr->segments_left++;
+ from = *(__be32 *)hdr;
+
+ hdr->segments_left--;
+ to = *(__be32 *)hdr;
+
+ /* update skb csum with diff resulting from seg_left decrement */
+
+ update_csum_diff4(skb, from, to);
+
+ /* compute csum diff between current and next segment and update */
+
+ update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr),
+ (__be32 *)addr);
+}
+
+static int ipv6_srh_rcv(struct sk_buff *skb)
+{
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(skb->dev);
+ struct ipv6_sr_hdr *hdr;
+ struct inet6_dev *idev;
+ struct in6_addr *addr;
+ bool cleanup = false;
+ int accept_seg6;
+
+ hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+ idev = __in6_dev_get(skb->dev);
+
+ accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
+ if (accept_seg6 > idev->cnf.seg6_enabled)
+ accept_seg6 = idev->cnf.seg6_enabled;
+
+ if (!accept_seg6) {
+ kfree_skb(skb);
+ return -1;
+ }
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (!seg6_hmac_validate_skb(skb)) {
+ kfree_skb(skb);
+ return -1;
+ }
+#endif
+
+looped_back:
+ if (hdr->segments_left > 0) {
+ if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
+ sr_has_cleanup(hdr))
+ cleanup = true;
+ } else {
+ if (hdr->nexthdr == NEXTHDR_IPV6) {
+ int offset = (hdr->hdrlen + 1) << 3;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+
+ if (!pskb_pull(skb, offset)) {
+ kfree_skb(skb);
+ return -1;
+ }
+ skb_postpull_rcsum(skb, skb_transport_header(skb),
+ offset);
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->encapsulation = 0;
+
+ __skb_tunnel_rx(skb, skb->dev, net);
+
+ netif_rx(skb);
+ return -1;
+ }
+
+ opt->srcrt = skb_network_header_len(skb);
+ opt->lastopt = opt->srcrt;
+ skb->transport_header += (hdr->hdrlen + 1) << 3;
+ opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
+
+ return 1;
+ }
+
+ if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->segments_left) -
+ skb_network_header(skb)));
+ kfree_skb(skb);
+ return -1;
+ }
+
+ if (skb_cloned(skb)) {
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -1;
+ }
+ }
+
+ hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+ hdr->segments_left--;
+ addr = hdr->segments + hdr->segments_left;
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ seg6_update_csum(skb);
+
+ ipv6_hdr(skb)->daddr = *addr;
+
+ if (cleanup) {
+ int srhlen = (hdr->hdrlen + 1) << 3;
+ int nh = hdr->nexthdr;
+
+ skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen);
+ memmove(skb_network_header(skb) + srhlen,
+ skb_network_header(skb),
+ (unsigned char *)hdr - skb_network_header(skb));
+ skb->network_header += srhlen;
+ ipv6_hdr(skb)->nexthdr = nh;
+ ipv6_hdr(skb)->payload_len = htons(skb->len -
+ sizeof(struct ipv6hdr));
+ skb_push_rcsum(skb, sizeof(struct ipv6hdr));
+ }
+
+ skb_dst_drop(skb);
+
+ ip6_route_input(skb);
+
+ if (skb_dst(skb)->error) {
+ dst_input(skb);
+ return -1;
+ }
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+ ICMPV6_EXC_HOPLIMIT, 0);
+ kfree_skb(skb);
+ return -1;
+ }
+ ipv6_hdr(skb)->hop_limit--;
+
+ /* be sure that srh is still present before reinjecting */
+ if (!cleanup) {
+ skb_pull(skb, sizeof(struct ipv6hdr));
+ goto looped_back;
+ }
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+ }
+
+ dst_input(skb);
+
+ return -1;
+}
+
/********************************
Routing header.
********************************/
@@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
return -1;
}
+ /* segment routing */
+ if (hdr->type == IPV6_SRCRT_TYPE_4)
+ return ipv6_srh_rcv(skb);
+
looped_back:
if (hdr->segments_left == 0) {
switch (hdr->type) {
@@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
* for headers.
*/
-static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
- struct ipv6_rt_hdr *opt,
- struct in6_addr **addr_p)
+static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
{
struct rt0_hdr *phdr, *ihdr;
int hops;
@@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
*proto = NEXTHDR_ROUTING;
}
+static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
+{
+ struct ipv6_sr_hdr *sr_phdr, *sr_ihdr;
+ int plen, hops;
+
+ sr_ihdr = (struct ipv6_sr_hdr *)opt;
+ plen = (sr_ihdr->hdrlen + 1) << 3;
+
+ sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen);
+ memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr));
+
+ hops = sr_ihdr->first_segment + 1;
+ memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1,
+ (hops - 1) * sizeof(struct in6_addr));
+
+ sr_phdr->segments[0] = **addr_p;
+ *addr_p = &sr_ihdr->segments[hops - 1];
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (sr_has_hmac(sr_phdr)) {
+ struct net *net = NULL;
+
+ if (skb->dev)
+ net = dev_net(skb->dev);
+ else if (skb->sk)
+ net = sock_net(skb->sk);
+
+ WARN_ON(!net);
+
+ if (net)
+ seg6_push_hmac(net, saddr, sr_phdr);
+ }
+#endif
+
+ sr_phdr->nexthdr = *proto;
+ *proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
+{
+ switch (opt->type) {
+ case IPV6_SRCRT_TYPE_0:
+ ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
+ break;
+ case IPV6_SRCRT_TYPE_4:
+ ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
+ break;
+ default:
+ break;
+ }
+}
+
static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
{
struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
@@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv
void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
u8 *proto,
- struct in6_addr **daddr)
+ struct in6_addr **daddr, struct in6_addr *saddr)
{
if (opt->srcrt) {
- ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+ ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
/*
* IPV6_RTHDRDSTOPTS is ignored
* unless IPV6_RTHDR is set (RFC3542).
@@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
return NULL;
*orig = fl6->daddr;
- fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+
+ switch (opt->srcrt->type) {
+ case IPV6_SRCRT_TYPE_0:
+ fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+ break;
+ case IPV6_SRCRT_TYPE_4:
+ {
+ struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
+
+ fl6->daddr = srh->segments[srh->first_segment];
+ break;
+ }
+ default:
+ return NULL;
+ }
+
return orig;
}
EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2772004ba5a1..3036f665e6c8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -70,7 +70,7 @@
#include <net/dsfield.h>
#include <net/l3mdev.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
* The ICMP socket(s). This is the most convenient way to flow control
@@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
- ip6_update_pmtu(skb, net, info, 0, 0);
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
else if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
if (!(type & ICMPV6_INFOMSG_MASK))
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -486,6 +487,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
+ fl6.flowi6_uid = sock_net_uid(net, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -660,6 +662,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
+ fl6.flowi6_uid = sock_net_uid(net, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index e50c27a93e17..13b5e85fe0d5 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -6,29 +6,88 @@
#include <linux/socket.h>
#include <linux/types.h>
#include <net/checksum.h>
+#include <net/dst_cache.h>
#include <net/ip.h>
#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
#include <net/lwtunnel.h>
#include <net/protocol.h>
#include <uapi/linux/ila.h>
#include "ila.h"
+struct ila_lwt {
+ struct ila_params p;
+ struct dst_cache dst_cache;
+ u32 connected : 1;
+};
+
+static inline struct ila_lwt *ila_lwt_lwtunnel(
+ struct lwtunnel_state *lwt)
+{
+ return (struct ila_lwt *)lwt->data;
+}
+
static inline struct ila_params *ila_params_lwtunnel(
- struct lwtunnel_state *lwstate)
+ struct lwtunnel_state *lwt)
{
- return (struct ila_params *)lwstate->data;
+ return &ila_lwt_lwtunnel(lwt)->p;
}
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
+ struct dst_entry *dst;
+ int err = -EINVAL;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
- return dst->lwtstate->orig_output(net, sk, skb);
+ if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
+ /* Already have a next hop address in route, no need for
+ * dest cache route.
+ */
+ return orig_dst->lwtstate->orig_output(net, sk, skb);
+ }
+
+ dst = dst_cache_get(&ilwt->dst_cache);
+ if (unlikely(!dst)) {
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ /* Lookup a route for the new destination. Take into
+ * account that the base route may already have a gateway.
+ */
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = orig_dst->dev->ifindex;
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
+ fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ &ip6h->daddr);
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = -EHOSTUNREACH;
+ dst_release(dst);
+ goto drop;
+ }
+
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto drop;
+ }
+
+ if (ilwt->connected)
+ dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+ }
+
+ skb_dst_set(skb, dst);
+ return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
@@ -60,9 +119,9 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
+ struct ila_lwt *ilwt;
struct ila_params *p;
struct nlattr *tb[ILA_ATTR_MAX + 1];
- size_t encap_len = sizeof(*p);
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
@@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
/* Need to have full locator and at least type field
* included in destination
*/
@@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- newts = lwtunnel_state_alloc(encap_len);
+ newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
return -ENOMEM;
- newts->len = encap_len;
+ ilwt = ila_lwt_lwtunnel(newts);
+ ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC);
+ if (ret) {
+ kfree(newts);
+ return ret;
+ }
+
p = ila_params_lwtunnel(newts);
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
@@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
LWTUNNEL_STATE_INPUT_REDIRECT;
+ if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr))
+ ilwt->connected = 1;
+
*ts = newts;
return 0;
}
+static void ila_destroy_state(struct lwtunnel_state *lwt)
+{
+ dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache);
+}
+
static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
@@ -159,11 +232,13 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops ila_encap_ops = {
.build_state = ila_build_state,
+ .destroy_state = ila_destroy_state,
.output = ila_output,
.input = ila_input,
.fill_encap = ila_fill_encap_info,
.get_encap_size = ila_encap_nlsize,
.cmp_encap = ila_encap_cmp,
+ .owner = THIS_MODULE,
};
int ila_lwt_init(void)
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e604013dd814..af8f52ee7180 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = {
.obj_cmpfn = ila_cmpfn,
};
-static struct genl_family ila_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = ILA_GENL_NAME,
- .version = ILA_GENL_VERSION,
- .maxattr = ILA_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
+static struct genl_family ila_nl_family;
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
@@ -482,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
{
struct net *net = sock_net(cb->skb->sk);
struct ila_net *ilan = net_generic(net, ila_net_id);
- struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
GFP_KERNEL);
@@ -490,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
static int ila_nl_dump_done(struct netlink_callback *cb)
{
- struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
rhashtable_walk_exit(&iter->rhiter);
+ kfree(iter);
+
return 0;
}
static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
struct rhashtable_iter *rhiter = &iter->rhiter;
struct ila_map *ila;
int ret;
@@ -561,6 +563,18 @@ static const struct genl_ops ila_nl_ops[] = {
},
};
+static struct genl_family ila_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = ILA_GENL_NAME,
+ .version = ILA_GENL_VERSION,
+ .maxattr = ILA_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .ops = ila_nl_ops,
+ .n_ops = ARRAY_SIZE(ila_nl_ops),
+};
+
#define ILA_HASH_TABLE_SIZE 1024
static __net_init int ila_init_net(struct net *net)
@@ -623,7 +637,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
return 0;
}
-int ila_xlat_init(void)
+int __init ila_xlat_init(void)
{
int ret;
@@ -631,8 +645,7 @@ int ila_xlat_init(void)
if (ret)
goto exit;
- ret = genl_register_family_with_ops(&ila_nl_family,
- ila_nl_ops);
+ ret = genl_register_family(&ila_nl_family);
if (ret < 0)
goto unregister;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 532c3ef282c5..75c308239243 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -29,11 +29,12 @@
#include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax)
+ const struct inet_bind_bucket *tb, bool relax,
+ bool reuseport_ok)
{
const struct sock *sk2;
- int reuse = sk->sk_reuse;
- int reuseport = sk->sk_reuseport;
+ bool reuse = !!sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport && reuseport_ok;
kuid_t uid = sock_i_uid((struct sock *)sk);
/* We must walk the whole port owner list in this case. -DaveM */
@@ -88,6 +89,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
fl6->flowi6_mark = ireq->ir_mark;
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
+ fl6->flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi(fl6));
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
@@ -136,6 +138,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_sport = inet->inet_sport;
fl6->fl6_dport = inet->inet_dport;
+ fl6->flowi6_uid = sk->sk_uid;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
rcu_read_lock();
@@ -173,7 +176,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass);
rcu_read_unlock();
return res;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b912f0dbaf72..8081bafe441b 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -29,7 +29,7 @@
#include <net/rawv6.h>
#include <net/transp_v6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
in old IPv6 RFC. Well, it was reasonable value.
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d7d6d3ae0b3b..558631860d91 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -64,7 +64,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
#define IP6_GRE_HASH_SIZE_SHIFT 5
#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
-static int ip6gre_net_id __read_mostly;
+static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
@@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
return -1;
@@ -580,6 +582,9 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
return -1;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
+
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -602,6 +607,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
return -1;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 89c59e656f44..fc7b4017ba24 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
__pskb_pull(skb, skb_gro_offset(skb));
+ skb_gro_frag0_invalidate(skb);
proto = ipv6_gso_pull_exthdrs(skb, proto);
skb_gro_pull(skb, -skb_transport_offset(skb));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 59eb4ed99ce8..b6a94ff0bbd0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/bpf-cgroup.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -131,6 +132,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -163,7 +172,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -203,7 +212,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt->opt_nflen)
- ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
+ &fl6->saddr);
}
skb_push(skb, sizeof(struct ipv6hdr));
@@ -230,7 +240,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = mark;
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
@@ -624,7 +634,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
- int first_len = skb_pagelen(skb);
+ unsigned int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
@@ -1334,7 +1344,7 @@ emsgsize:
*/
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
headersize == sizeof(struct ipv6hdr) &&
- length < mtu - headersize &&
+ length <= mtu - headersize &&
!(flags & MSG_MORE) &&
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
@@ -1363,7 +1373,7 @@ emsgsize:
*/
cork->length += length;
- if (((length > mtu) ||
+ if ((((length + fragheaderlen) > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
@@ -1672,7 +1682,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
- ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d76674efe523..75fac933c209 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -42,7 +42,7 @@
#include <linux/hash.h>
#include <linux/etherdevice.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <net/icmp.h>
@@ -83,7 +83,7 @@ static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
static struct rtnl_link_ops ip6_link_ops __read_mostly;
-static int ip6_tnl_net_id __read_mostly;
+static unsigned int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
@@ -400,18 +400,19 @@ ip6_tnl_dev_uninit(struct net_device *dev)
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
- __u8 nexthdr = ipv6h->nexthdr;
- __u16 off = sizeof(*ipv6h);
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
+ unsigned int nhoff = raw - skb->data;
+ unsigned int off = nhoff + sizeof(*ipv6h);
+ u8 next, nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
- __u16 optlen = 0;
struct ipv6_opt_hdr *hdr;
- if (raw + off + sizeof(*hdr) > skb->data &&
- !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+ u16 optlen;
+
+ if (!pskb_may_pull(skb, off + sizeof(*hdr)))
break;
- hdr = (struct ipv6_opt_hdr *) (raw + off);
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
if (frag_hdr->frag_off)
@@ -422,20 +423,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
} else {
optlen = ipv6_optlen(hdr);
}
+ /* cache hdr->nexthdr, since pskb_may_pull() might
+ * invalidate hdr
+ */
+ next = hdr->nexthdr;
if (nexthdr == NEXTHDR_DEST) {
- __u16 i = off + 2;
+ u16 i = 2;
+
+ /* Remember : hdr is no longer valid at this point. */
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
/* No more room for encapsulation limit */
- if (i + sizeof (*tel) > off + optlen)
+ if (i + sizeof(*tel) > optlen)
break;
- tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+ tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
/* return index of option if found and valid */
if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
tel->length == 1)
- return i;
+ return i + off - nhoff;
/* else jump to next option */
if (tel->type)
i += tel->length + 2;
@@ -443,7 +453,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = hdr->nexthdr;
+ nexthdr = next;
off += optlen;
}
return 0;
@@ -1108,7 +1118,7 @@ route_lookup:
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - psh_hlen;
+ mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1117,7 +1127,7 @@ route_lookup:
mtu = IPV6_MIN_MTU;
if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
@@ -1166,7 +1176,7 @@ route_lookup:
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
- ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL);
}
/* Calculate max headroom for all the headers and adjust
@@ -1248,6 +1258,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_mark = skb->mark;
}
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1301,6 +1313,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel = key->label;
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -1326,6 +1340,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_mark = skb->mark;
}
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1645,7 +1661,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
struct ip6_tnl *tnl = netdev_priv(dev);
if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < 68)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
} else {
if (new_mtu < IPV6_MIN_MTU)
@@ -1798,6 +1814,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->mtu = ETH_DATA_LEN - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - dev->hard_header_len;
return 0;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c299c1e2bbf0..d82042c8d8fd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -64,7 +64,7 @@ static int vti6_dev_init(struct net_device *dev);
static void vti6_dev_setup(struct net_device *dev);
static struct rtnl_link_ops vti6_link_ops __read_mostly;
-static int vti6_net_id __read_mostly;
+static unsigned int vti6_net_id __read_mostly;
struct vti6_net {
/* the vti6 tunnel fallback device */
struct net_device *fb_tnl_dev;
@@ -189,12 +189,12 @@ static int vti6_tnl_create2(struct net_device *dev)
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
int err;
+ dev->rtnl_link_ops = &vti6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &vti6_link_ops;
dev_hold(dev);
vti6_tnl_link(ip6n, t);
@@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
else
- ip6_update_pmtu(skb, net, info, 0, 0);
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
xfrm_state_put(x);
return 0;
@@ -812,30 +813,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return err;
}
-/**
- * vti6_tnl_change_mtu - change mtu manually for tunnel device
- * @dev: virtual device associated with tunnel
- * @new_mtu: the new mtu
- *
- * Return:
- * 0 on success,
- * %-EINVAL if mtu too small
- **/
-static int vti6_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < IPV6_MIN_MTU)
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops vti6_netdev_ops = {
.ndo_init = vti6_dev_init,
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_do_ioctl = vti6_ioctl,
- .ndo_change_mtu = vti6_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -855,6 +837,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
dev->mtu = ETH_DATA_LEN;
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = IP_MAX_MTU;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
netif_keep_dst(dev);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7f4265b1649b..604d8953c775 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -16,7 +16,7 @@
*
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb)
goto drop;
pim = (struct pimreghdr *)skb_transport_header(skb);
- if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+ if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
(pim->flags & PIM_NULL_REGISTER) ||
(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
sizeof(*pim), IPPROTO_PIM,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1b9316e1386a..54d165b9845a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
else
- ip6_update_pmtu(skb, net, info, 0, 0);
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
xfrm_state_put(x);
return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 636ec56f5f50..ee97c44e2aa0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -52,8 +52,9 @@
#include <net/udplite.h>
#include <net/xfrm.h>
#include <net/compat.h>
+#include <net/seg6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
struct ip6_ra_chain *ip6_ra_chain;
DEFINE_RWLOCK(ip6_ra_lock);
@@ -430,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
#endif
+ case IPV6_SRCRT_TYPE_4:
+ {
+ struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
+ opt->srcrt;
+
+ if (!seg6_validate_srh(srh, optlen))
+ goto sticky_done;
+ break;
+ }
default:
goto sticky_done;
}
@@ -868,6 +878,10 @@ pref_skip_coa:
np->autoflowlabel = valbool;
retv = 0;
break;
+ case IPV6_RECVFRAGSIZE:
+ np->rxopt.bits.recvfragsize = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -1310,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->autoflowlabel;
break;
+ case IPV6_RECVFRAGSIZE:
+ val = np->rxopt.bits.recvfragsize;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 14a3903f1c82..7139fffd61b6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -81,7 +81,7 @@ static void mld_gq_timer_expire(unsigned long data);
static void mld_ifc_timer_expire(unsigned long data);
static void mld_ifc_event(struct inet6_dev *idev);
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
+static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
static void mld_clear_delrec(struct inet6_dev *idev);
static bool mld_in_v1_mode(const struct inet6_dev *idev);
static int sf_setstate(struct ifmcaddr6 *pmc);
@@ -692,9 +692,9 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
dev_mc_del(dev, buf);
}
- if (mc->mca_flags & MAF_NOREPORT)
- goto done;
spin_unlock_bh(&mc->mca_lock);
+ if (mc->mca_flags & MAF_NOREPORT)
+ return;
if (!mc->idev->dead)
igmp6_leave_group(mc);
@@ -702,8 +702,6 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
spin_lock_bh(&mc->mca_lock);
if (del_timer(&mc->mca_timer))
atomic_dec(&mc->mca_refcnt);
-done:
- ip6_mc_clear_src(mc);
spin_unlock_bh(&mc->mca_lock);
}
@@ -748,10 +746,11 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
spin_unlock_bh(&idev->mc_lock);
}
-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
+static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
{
struct ifmcaddr6 *pmc, *pmc_prev;
- struct ip6_sf_list *psf, *psf_next;
+ struct ip6_sf_list *psf;
+ struct in6_addr *pmca = &im->mca_addr;
spin_lock_bh(&idev->mc_lock);
pmc_prev = NULL;
@@ -768,14 +767,20 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
}
spin_unlock_bh(&idev->mc_lock);
+ spin_lock_bh(&im->mca_lock);
if (pmc) {
- for (psf = pmc->mca_tomb; psf; psf = psf_next) {
- psf_next = psf->sf_next;
- kfree(psf);
+ im->idev = pmc->idev;
+ im->mca_crcount = idev->mc_qrv;
+ im->mca_sfmode = pmc->mca_sfmode;
+ if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ im->mca_tomb = pmc->mca_tomb;
+ im->mca_sources = pmc->mca_sources;
+ for (psf = im->mca_sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = im->mca_crcount;
}
in6_dev_put(pmc->idev);
- kfree(pmc);
}
+ spin_unlock_bh(&im->mca_lock);
}
static void mld_clear_delrec(struct inet6_dev *idev)
@@ -904,7 +909,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
mca_get(mc);
write_unlock_bh(&idev->lock);
- mld_del_delrec(idev, &mc->mca_addr);
+ mld_del_delrec(idev, mc);
igmp6_group_added(mc);
ma_put(mc);
return 0;
@@ -927,6 +932,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
igmp6_group_dropped(ma);
+ ip6_mc_clear_src(ma);
ma_put(ma);
return 0;
@@ -2501,15 +2507,17 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Withdraw multicast list */
read_lock_bh(&idev->lock);
- mld_ifc_stop_timer(idev);
- mld_gq_stop_timer(idev);
- mld_dad_stop_timer(idev);
for (i = idev->mc_list; i; i = i->next)
igmp6_group_dropped(i);
- read_unlock_bh(&idev->lock);
- mld_clear_delrec(idev);
+ /* Should stop timer after group drop. or we will
+ * start timer again in mld_ifc_event()
+ */
+ mld_ifc_stop_timer(idev);
+ mld_gq_stop_timer(idev);
+ mld_dad_stop_timer(idev);
+ read_unlock_bh(&idev->lock);
}
static void ipv6_mc_reset(struct inet6_dev *idev)
@@ -2531,8 +2539,10 @@ void ipv6_mc_up(struct inet6_dev *idev)
read_lock_bh(&idev->lock);
ipv6_mc_reset(idev);
- for (i = idev->mc_list; i; i = i->next)
+ for (i = idev->mc_list; i; i = i->next) {
+ mld_del_delrec(idev, i);
igmp6_group_added(i);
+ }
read_unlock_bh(&idev->lock);
}
@@ -2565,6 +2575,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
/* Deactivate timers */
ipv6_mc_down(idev);
+ mld_clear_delrec(idev);
/* Delete all-nodes address. */
/* We cannot call ipv6_dev_mc_dec() directly, our caller in
@@ -2579,11 +2590,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
write_lock_bh(&idev->lock);
while ((i = idev->mc_list) != NULL) {
idev->mc_list = i->next;
- write_unlock_bh(&idev->lock);
- igmp6_group_dropped(i);
+ write_unlock_bh(&idev->lock);
ma_put(i);
-
write_lock_bh(&idev->lock);
}
write_unlock_bh(&idev->lock);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 60c79a08e14a..64f0f7be9e5e 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -191,7 +191,7 @@ static inline int mip6_report_rl_allow(ktime_t stamp,
int allow = 0;
spin_lock_bh(&mip6_report_rl.lock);
- if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
+ if (mip6_report_rl.stamp != stamp ||
mip6_report_rl.iif != iif ||
!ipv6_addr_equal(&mip6_report_rl.src, src) ||
!ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d8e671457d10..7ebac630d3c6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -233,6 +233,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
case ND_OPT_MTU:
+ case ND_OPT_NONCE:
case ND_OPT_REDIRECT_HDR:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
ND_PRINTK(2, warn,
@@ -568,7 +569,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
}
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr)
+ const struct in6_addr *daddr, const struct in6_addr *saddr,
+ u64 nonce)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -588,6 +590,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_SOLICITATION);
+ if (nonce != 0)
+ optlen += 8;
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -605,6 +609,13 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr,
NDISC_NEIGHBOUR_SOLICITATION);
+ if (nonce != 0) {
+ u8 *opt = skb_put(skb, 8);
+
+ opt[0] = ND_OPT_NONCE;
+ opt[1] = 8 >> 3;
+ memcpy(opt + 2, &nonce, 6);
+ }
ndisc_send_skb(skb, daddr, saddr);
}
@@ -693,12 +704,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, target, target, saddr);
+ ndisc_send_ns(dev, target, target, saddr, 0);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, target, &mcaddr, saddr);
+ ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
}
}
@@ -742,6 +753,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
int dad = ipv6_addr_any(saddr);
bool inc;
int is_router = -1;
+ u64 nonce = 0;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NS: packet too short\n");
@@ -786,6 +798,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
}
+ if (ndopts.nd_opts_nonce)
+ memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
inc = ipv6_addr_is_multicast(daddr);
@@ -794,6 +808,15 @@ static void ndisc_recv_ns(struct sk_buff *skb)
have_ifp:
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
+ if (nonce != 0 && ifp->dad_nonce == nonce) {
+ u8 *np = (u8 *)&nonce;
+ /* Matching nonce if looped back */
+ ND_PRINTK(2, notice,
+ "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
+ ifp->idev->dev->name,
+ &ifp->addr, np);
+ goto out;
+ }
/*
* We are colliding with another node
* who is doing DAD
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d11c46833d61..39970e212ad5 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
struct flowi6 fl6 = {
.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
.flowi6_mark = skb->mark,
+ .flowi6_uid = sock_net_uid(net, skb->sk),
.daddr = iph->daddr,
.saddr = iph->saddr,
};
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index e10a04c9cdc7..6acb2eecd986 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
+config NF_SOCKET_IPV6
+ tristate "IPv6 socket lookup support"
+ help
+ This option enables the IPv6 socket lookup infrastructure. This
+ is used by the ip6tables socket match.
+
if NF_TABLES
config NF_TABLES_IPV6
@@ -54,6 +60,14 @@ config NFT_DUP_IPV6
help
This module enables IPv6 packet duplication support for nf_tables.
+config NFT_FIB_IPV6
+ tristate "nf_tables fib / ipv6 route lookup support"
+ select NFT_FIB
+ help
+ This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
+ It also allows query of the FIB for the route type, e.g. local, unicast,
+ multicast or blackhole.
+
endif # NF_TABLES_IPV6
endif # NF_TABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b4f7d0b4e2af..fe180c96040e 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
+
# logging
obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
@@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
+obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 55aacea24396..25a022d41a70 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -24,7 +24,7 @@
#include <linux/icmpv6.h>
#include <net/ipv6.h>
#include <net/compat.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/mutex.h>
#include <linux/proc_fs.h>
#include <linux/err.h>
@@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb,
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
acpar.hotdrop = false;
- acpar.net = state->net;
- acpar.in = state->in;
- acpar.out = state->out;
- acpar.family = NFPROTO_IPV6;
- acpar.hooknum = hook;
+ acpar.state = state;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -566,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
static int
find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
- unsigned int size)
+ unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
@@ -574,12 +571,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- unsigned long pcnt;
- pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(pcnt))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
- e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -616,7 +610,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
cleanup_match(ematch, net);
}
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -703,8 +697,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
-
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
@@ -713,6 +706,7 @@ static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct ip6t_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -772,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, net, repl->name, repl->size);
+ ret = find_check_entry(iter, net, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
@@ -1007,7 +1002,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1037,7 +1032,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET6);
@@ -1063,7 +1058,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1074,7 +1069,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
return ret;
}
@@ -1099,8 +1094,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1214,8 +1209,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
- if (IS_ERR_OR_NULL(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
+ if (!t) {
+ ret = -ENOENT;
goto free;
}
@@ -1651,7 +1646,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (!IS_ERR_OR_NULL(t)) {
+ if (t) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1665,7 +1660,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ ret = -ENOENT;
xt_compat_unlock(AF_INET6);
return ret;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 7f9f45d829d2..2b1a15846f9a 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -24,7 +24,7 @@
static unsigned int
masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
- return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out);
+ return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
}
static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index db29bbf41b59..fa51a205918d 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,35 +39,40 @@ static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
- struct net *net = par->net;
+ struct net *net = xt_net(par);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
- nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par));
break;
case IP6T_ICMP6_ADM_PROHIBITED:
- nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED,
+ xt_hooknum(par));
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
- nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR,
+ xt_hooknum(par));
break;
case IP6T_ICMP6_ADDR_UNREACH:
- nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH,
+ xt_hooknum(par));
break;
case IP6T_ICMP6_PORT_UNREACH:
- nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH,
+ xt_hooknum(par));
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
- nf_send_reset6(net, skb, par->hooknum);
+ nf_send_reset6(net, skb, xt_hooknum(par));
break;
case IP6T_ICMP6_POLICY_FAIL:
- nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par));
break;
case IP6T_ICMP6_REJECT_ROUTE:
- nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE,
+ xt_hooknum(par));
break;
}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 06bed74cf5ee..98c8dd38575a 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -277,12 +277,12 @@ static unsigned int
synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct net *net = par->net;
+ struct net *net = xt_net(par);
struct synproxy_net *snet = synproxy_pernet(net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
- if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
return NF_DROP;
th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
@@ -440,12 +440,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param *par)
e->ipv6.invflags & XT_INV_PROTO)
return -EINVAL;
- return nf_ct_l3proto_try_module_get(par->family);
+ return nf_ct_netns_get(par->net, par->family);
}
static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target synproxy_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 1ee1b25df096..b12e61b7b16c 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -72,10 +72,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
return ret;
}
-static bool rpfilter_is_local(const struct sk_buff *skb)
+static bool
+rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
- const struct rt6_info *rt = (const void *) skb_dst(skb);
- return rt && (rt->rt6i_flags & RTF_LOCAL);
+ return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct ipv6hdr *iph;
bool invert = info->flags & XT_RPFILTER_INVERT;
- if (rpfilter_is_local(skb))
+ if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;
iph = ipv6_hdr(skb);
@@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (unlikely(saddrtype == IPV6_ADDR_ANY))
return true ^ invert; /* not routable: forward path will drop it */
- return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par),
+ info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 963ee3848675..4e3402486833 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -34,6 +34,13 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
+static int conntrack6_net_id;
+static DEFINE_MUTEX(register_ipv6_hooks);
+
+struct conntrack6_net {
+ unsigned int users;
+};
+
static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -308,6 +315,42 @@ static int ipv6_nlattr_tuple_size(void)
}
#endif
+static int ipv6_hooks_register(struct net *net)
+{
+ struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
+ int err = 0;
+
+ mutex_lock(&register_ipv6_hooks);
+ cnet->users++;
+ if (cnet->users > 1)
+ goto out_unlock;
+
+ err = nf_defrag_ipv6_enable(net);
+ if (err < 0) {
+ cnet->users = 0;
+ goto out_unlock;
+ }
+
+ err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
+ if (err)
+ cnet->users = 0;
+ out_unlock:
+ mutex_unlock(&register_ipv6_hooks);
+ return err;
+}
+
+static void ipv6_hooks_unregister(struct net *net)
+{
+ struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
+
+ mutex_lock(&register_ipv6_hooks);
+ if (cnet->users && (--cnet->users == 0))
+ nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
+ mutex_unlock(&register_ipv6_hooks);
+}
+
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.l3proto = PF_INET6,
.name = "ipv6",
@@ -321,6 +364,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
#endif
+ .net_ns_get = ipv6_hooks_register,
+ .net_ns_put = ipv6_hooks_unregister,
.me = THIS_MODULE,
};
@@ -336,52 +381,51 @@ static struct nf_sockopt_ops so_getorigdst6 = {
.owner = THIS_MODULE,
};
+static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+ &nf_conntrack_l4proto_tcp6,
+ &nf_conntrack_l4proto_udp6,
+ &nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ &nf_conntrack_l4proto_udplite6,
+#endif
+};
+
static int ipv6_net_init(struct net *net)
{
int ret = 0;
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
- if (ret < 0) {
- pr_err("nf_conntrack_tcp6: pernet registration failed\n");
- goto out;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
- if (ret < 0) {
- pr_err("nf_conntrack_udp6: pernet registration failed\n");
- goto cleanup_tcp6;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
- if (ret < 0) {
- pr_err("nf_conntrack_icmp6: pernet registration failed\n");
- goto cleanup_udp6;
- }
+ ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
+ if (ret < 0)
+ return ret;
+
ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
if (ret < 0) {
pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
- goto cleanup_icmpv6;
+ nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
}
- return 0;
- cleanup_icmpv6:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
- out:
return ret;
}
static void ipv6_net_exit(struct net *net)
{
nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+ nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
}
static struct pernet_operations ipv6_net_ops = {
.init = ipv6_net_init,
.exit = ipv6_net_exit,
+ .id = &conntrack6_net_id,
+ .size = sizeof(struct conntrack6_net),
};
static int __init nf_conntrack_l3proto_ipv6_init(void)
@@ -389,7 +433,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
int ret = 0;
need_conntrack();
- nf_defrag_ipv6_enable();
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
@@ -401,47 +444,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
if (ret < 0)
goto cleanup_sockopt;
- ret = nf_register_hooks(ipv6_conntrack_ops,
- ARRAY_SIZE(ipv6_conntrack_ops));
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
- "hook.\n");
+ ret = nf_ct_l4proto_register(builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
+ if (ret < 0)
goto cleanup_pernet;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
- goto cleanup_hooks;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
- goto cleanup_tcp6;
- }
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
- goto cleanup_udp6;
- }
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
if (ret < 0) {
pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
- goto cleanup_icmpv6;
+ goto cleanup_l4proto;
}
return ret;
-
- cleanup_icmpv6:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
- cleanup_hooks:
- nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+cleanup_l4proto:
+ nf_ct_l4proto_unregister(builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
cleanup_pernet:
unregister_pernet_subsys(&ipv6_net_ops);
cleanup_sockopt:
@@ -453,10 +469,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+ nf_ct_l4proto_unregister(builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
unregister_pernet_subsys(&ipv6_net_ops);
nf_unregister_sockopt(&so_getorigdst6);
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index f06b0471f39f..8e0bdd058787 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -30,6 +30,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+static DEFINE_MUTEX(defrag6_mutex);
+
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
@@ -87,6 +89,19 @@ static struct nf_hook_ops ipv6_defrag_ops[] = {
},
};
+static void __net_exit defrag6_net_exit(struct net *net)
+{
+ if (net->nf.defrag_ipv6) {
+ nf_unregister_net_hooks(net, ipv6_defrag_ops,
+ ARRAY_SIZE(ipv6_defrag_ops));
+ net->nf.defrag_ipv6 = false;
+ }
+}
+
+static struct pernet_operations defrag6_net_ops = {
+ .exit = defrag6_net_exit,
+};
+
static int __init nf_defrag_init(void)
{
int ret = 0;
@@ -96,9 +111,9 @@ static int __init nf_defrag_init(void)
pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
return ret;
}
- ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+ ret = register_pernet_subsys(&defrag6_net_ops);
if (ret < 0) {
- pr_err("nf_defrag_ipv6: can't register hooks\n");
+ pr_err("nf_defrag_ipv6: can't register pernet ops\n");
goto cleanup_frag6;
}
return ret;
@@ -111,12 +126,31 @@ cleanup_frag6:
static void __exit nf_defrag_fini(void)
{
- nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+ unregister_pernet_subsys(&defrag6_net_ops);
nf_ct_frag6_cleanup();
}
-void nf_defrag_ipv6_enable(void)
+int nf_defrag_ipv6_enable(struct net *net)
{
+ int err = 0;
+
+ might_sleep();
+
+ if (net->nf.defrag_ipv6)
+ return 0;
+
+ mutex_lock(&defrag6_mutex);
+ if (net->nf.defrag_ipv6)
+ goto out_unlock;
+
+ err = nf_register_net_hooks(net, ipv6_defrag_ops,
+ ARRAY_SIZE(ipv6_defrag_ops));
+ if (err == 0)
+ net->nf.defrag_ipv6 = true;
+
+ out_unlock:
+ mutex_unlock(&defrag6_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 10090400c72f..eedee5d108d9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
@@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
skb_dst_set(nskb, dst);
+ nskb->mark = fl6.flowi6_mark;
+
skb_reserve(nskb, hh_len + dst->header_len);
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
ip6_dst_hoplimit(dst));
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
new file mode 100644
index 000000000000..ebb2bf84232a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp6_fields(const struct sk_buff *skb,
+ unsigned int outside_hdrlen,
+ int *protocol,
+ const struct in6_addr **raddr,
+ const struct in6_addr **laddr,
+ __be16 *rport,
+ __be16 *lport,
+ struct ipv6hdr *ipv6_var)
+{
+ const struct ipv6hdr *inside_iph;
+ struct icmp6hdr *icmph, _icmph;
+ __be16 *ports, _ports[2];
+ u8 inside_nexthdr;
+ __be16 inside_fragoff;
+ int inside_hdrlen;
+
+ icmph = skb_header_pointer(skb, outside_hdrlen,
+ sizeof(_icmph), &_icmph);
+ if (icmph == NULL)
+ return 1;
+
+ if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
+ return 1;
+
+ inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
+ sizeof(*ipv6_var), ipv6_var);
+ if (inside_iph == NULL)
+ return 1;
+ inside_nexthdr = inside_iph->nexthdr;
+
+ inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
+ sizeof(*ipv6_var),
+ &inside_nexthdr, &inside_fragoff);
+ if (inside_hdrlen < 0)
+ return 1; /* hjm: Packet has no/incomplete transport layer headers. */
+
+ if (inside_nexthdr != IPPROTO_TCP &&
+ inside_nexthdr != IPPROTO_UDP)
+ return 1;
+
+ ports = skb_header_pointer(skb, inside_hdrlen,
+ sizeof(_ports), &_ports);
+ if (ports == NULL)
+ return 1;
+
+ /* the inside IP packet is the one quoted from our side, thus
+ * its saddr is the local address */
+ *protocol = inside_nexthdr;
+ *laddr = &inside_iph->saddr;
+ *lport = ports[0];
+ *raddr = &inside_iph->daddr;
+ *rport = ports[1];
+
+ return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+
+ return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev)
+{
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
+ int thoff = 0, tproto;
+
+ tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+ if (tproto < 0) {
+ pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+ return NULL;
+ }
+
+ if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
+ saddr = &iph->saddr;
+ sport = hp->source;
+ daddr = &iph->daddr;
+ dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
+
+ } else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
+ if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
+ &sport, &dport, &ipv6_var))
+ return NULL;
+ } else {
+ return NULL;
+ }
+
+ return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
+ sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure");
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 831f86e1ec08..d8b5b60b7d53 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
- nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
+ nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif);
}
static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
new file mode 100644
index 000000000000..765facf03d45
--- /dev/null
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -0,0 +1,270 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+static int get_ifindex(const struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+ const struct nft_pktinfo *pkt,
+ const struct net_device *dev)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
+ int lookup_flags = 0;
+
+ if (priv->flags & NFTA_FIB_F_DADDR) {
+ fl6->daddr = iph->daddr;
+ fl6->saddr = iph->saddr;
+ } else {
+ fl6->daddr = iph->saddr;
+ fl6->saddr = iph->daddr;
+ }
+
+ if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+ }
+
+ if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+ lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ if (priv->flags & NFTA_FIB_F_MARK)
+ fl6->flowi6_mark = pkt->skb->mark;
+
+ fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+
+ return lookup_flags;
+}
+
+static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+ const struct nft_pktinfo *pkt)
+{
+ const struct net_device *dev = NULL;
+ const struct nf_ipv6_ops *v6ops;
+ const struct nf_afinfo *afinfo;
+ int route_err, addrtype;
+ struct rt6_info *rt;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_proto = pkt->tprot,
+ };
+ u32 ret = 0;
+
+ afinfo = nf_get_afinfo(NFPROTO_IPV6);
+ if (!afinfo)
+ return RTN_UNREACHABLE;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ dev = nft_in(pkt);
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ dev = nft_out(pkt);
+
+ nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+
+ v6ops = nf_get_ipv6_ops();
+ if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ ret = RTN_LOCAL;
+
+ route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
+ if (route_err)
+ goto err;
+
+ if (rt->rt6i_flags & RTF_REJECT) {
+ route_err = rt->dst.error;
+ dst_release(&rt->dst);
+ goto err;
+ }
+
+ if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
+ ret = RTN_ANYCAST;
+ else if (!dev && rt->rt6i_flags & RTF_LOCAL)
+ ret = RTN_LOCAL;
+
+ dst_release(&rt->dst);
+
+ if (ret)
+ return ret;
+
+ addrtype = ipv6_addr_type(&fl6.daddr);
+
+ if (addrtype & IPV6_ADDR_MULTICAST)
+ return RTN_MULTICAST;
+ if (addrtype & IPV6_ADDR_UNICAST)
+ return RTN_UNICAST;
+
+ return RTN_UNSPEC;
+ err:
+ switch (route_err) {
+ case -EINVAL:
+ return RTN_BLACKHOLE;
+ case -EACCES:
+ return RTN_PROHIBIT;
+ case -EAGAIN:
+ return RTN_THROW;
+ default:
+ break;
+ }
+
+ return RTN_UNREACHABLE;
+}
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+
+ *dest = __nft_fib6_eval_type(priv, pkt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ const struct net_device *oif = NULL;
+ u32 *dest = &regs->data[priv->dreg];
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_proto = pkt->tprot,
+ };
+ struct rt6_info *rt;
+ int lookup_flags;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ oif = nft_in(pkt);
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ oif = nft_out(pkt);
+
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+ nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ nft_in(pkt)->ifindex);
+ return;
+ }
+
+ *dest = 0;
+ again:
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ if (rt->dst.error)
+ goto put_rt_err;
+
+ /* Should not see RTF_LOCAL here */
+ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
+ goto put_rt_err;
+
+ if (oif && oif != rt->rt6i_idev->dev) {
+ /* multipath route? Try again with F_IFACE */
+ if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6.flowi6_oif = oif->ifindex;
+ ip6_rt_put(rt);
+ goto again;
+ }
+ }
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ *dest = rt->rt6i_idev->dev->ifindex;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ put_rt_err:
+ ip6_rt_put(rt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval);
+
+static struct nft_expr_type nft_fib6_type;
+
+static const struct nft_expr_ops nft_fib6_type_ops = {
+ .type = &nft_fib6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib6_eval_type,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib6_ops = {
+ .type = &nft_fib6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib6_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib6_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_fib_result result;
+
+ if (!tb[NFTA_FIB_RESULT])
+ return ERR_PTR(-EINVAL);
+
+ result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+ switch (result) {
+ case NFT_FIB_RESULT_OIF:
+ return &nft_fib6_ops;
+ case NFT_FIB_RESULT_OIFNAME:
+ return &nft_fib6_ops;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return &nft_fib6_type_ops;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static struct nft_expr_type nft_fib6_type __read_mostly = {
+ .name = "fib",
+ .select_ops = &nft_fib6_select_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib6_module_init(void)
+{
+ return nft_register_expr(&nft_fib6_type);
+}
+
+static void __exit nft_fib6_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib6_type);
+}
+module_init(nft_fib6_module_init);
+module_exit(nft_fib6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 9597ffb74077..6c5b5b1830a7 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,14 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
range.max_proto.all =
*(__be16 *)&regs->data[priv->sreg_proto_max];
}
- regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
+ regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+ nft_out(pkt));
+}
+
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
}
static struct nft_expr_type nft_masq_ipv6_type;
@@ -41,6 +48,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
.eval = nft_masq_ipv6_eval,
.init = nft_masq_init,
+ .destroy = nft_masq_ipv6_destroy,
.dump = nft_masq_dump,
.validate = nft_masq_validate,
};
@@ -77,5 +85,5 @@ module_init(nft_masq_ipv6_module_init);
module_exit(nft_masq_ipv6_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index aca44e89a881..f5ac080fc084 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -35,7 +35,14 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
range.flags |= priv->flags;
- regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
+ regs->verdict.code =
+ nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
}
static struct nft_expr_type nft_redir_ipv6_type;
@@ -44,6 +51,7 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
.eval = nft_redir_ipv6_eval,
.init = nft_redir_init,
+ .destroy = nft_redir_ipv6_destroy,
.dump = nft_redir_dump,
.validate = nft_redir_validate,
};
@@ -71,5 +79,5 @@ module_init(nft_redir_ipv6_module_init);
module_exit(nft_redir_ipv6_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 92bda9908bb9..057deeaff1cb 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
- pkt->hook);
+ nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code,
+ nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
+ nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
break;
default:
break;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 66e2d9dfc43a..e1f8b34d7a2e 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.daddr = *daddr;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 054a1d84fc5e..ea89073c8247 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -65,11 +65,12 @@
#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
-static struct raw_hashinfo raw_v6_hashinfo = {
+struct raw_hashinfo raw_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
+EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
-static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif)
{
@@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
found:
return sk;
}
+EXPORT_SYMBOL_GPL(__raw_v6_lookup);
/*
* 0 - deliver
@@ -589,7 +591,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
}
offset += skb_transport_offset(skb);
- BUG_ON(skb_copy_bits(skb, offset, &csum, 2));
+ err = skb_copy_bits(skb, offset, &csum, 2);
+ if (err < 0) {
+ ip6_flush_pending_frames(sk);
+ goto out;
+ }
/* in case cksum was not initialized */
if (unlikely(csum))
@@ -774,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_uid = sk->sk_uid;
ipc6.hlimit = -1;
ipc6.tclass = -1;
@@ -1259,6 +1266,7 @@ struct proto rawv6_prot = {
.compat_getsockopt = compat_rawv6_getsockopt,
.compat_ioctl = compat_rawv6_ioctl,
#endif
+ .diag_destroy = raw_abort,
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3815e8505ed2..e1da5b888cc4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
{
struct sk_buff *prev, *next;
struct net_device *dev;
- int offset, end;
+ int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;
@@ -336,6 +336,10 @@ found:
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.net, skb->truesize);
+ fragsize = -skb_network_offset(skb) + skb->len;
+ if (fragsize > fq->q.max_size)
+ fq->q.max_size = fragsize;
+
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
@@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(head)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
skb_postpush_rcsum(head, skb_network_header(head),
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1b57e11e6e0d..7ea85370c11c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -64,7 +64,7 @@
#include <net/l3mdev.h>
#include <trace/events/fib6.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -527,7 +527,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
dev_put(work->dev);
kfree(work);
}
@@ -1408,7 +1408,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
}
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
- int oif, u32 mark)
+ int oif, u32 mark, kuid_t uid)
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
@@ -1420,6 +1420,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
+ fl6.flowi6_uid = uid;
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -1433,7 +1434,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
struct dst_entry *dst;
ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark);
+ sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -1463,7 +1464,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_node *fn;
/* Get the "current" route for this destination and
- * check if the redirect has come from approriate router.
+ * check if the redirect has come from appropriate router.
*
* RFC 4861 specifies that redirects should only be
* accepted if they come from the nexthop to the target.
@@ -1525,7 +1526,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
flags, __ip6_route_redirect);
}
-void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
+ kuid_t uid)
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
@@ -1538,6 +1540,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
+ fl6.flowi6_uid = uid;
dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -1559,6 +1562,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
fl6.flowi6_mark = mark;
fl6.daddr = msg->dest;
fl6.saddr = iph->daddr;
+ fl6.flowi6_uid = sock_net_uid(net, NULL);
dst = ip6_route_redirect(net, &fl6, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -1567,7 +1571,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
+ sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
@@ -1995,8 +2000,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
It is very good, but in some (rare!) circumstances
(SIT, PtP, NBMA NOARP links) it is handy to allow
some exceptions. --ANK
+ We allow IPv4-mapped nexthops to support RFC4798-type
+ addressing
*/
- if (!(gwa_type & IPV6_ADDR_UNICAST))
+ if (!(gwa_type & (IPV6_ADDR_UNICAST |
+ IPV6_ADDR_MAPPED)))
goto out;
if (cfg->fc_table) {
@@ -2166,6 +2174,8 @@ static int ip6_route_del(struct fib6_config *cfg)
continue;
if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
continue;
+ if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+ continue;
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
@@ -2758,7 +2768,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
old MTU is the lowest MTU in the path, update the route PMTU
to reflect the increase. In this case if the other nodes' MTU
also have the lowest MTU, TOO BIG MESSAGE will be lead to
- PMTU discouvery.
+ PMTU discovery.
*/
if (rt->dst.dev == arg->dev &&
dst_metric_raw(&rt->dst, RTAX_MTU) &&
@@ -2801,6 +2811,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_ENCAP_TYPE] = { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_EXPIRES] = { .type = NLA_U32 },
+ [RTA_UID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2885,6 +2896,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_MULTIPATH]) {
cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+
+ err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
+ cfg->fc_mp_len);
+ if (err < 0)
+ goto errout;
}
if (tb[RTA_PREF]) {
@@ -2898,9 +2914,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_ENCAP])
cfg->fc_encap = tb[RTA_ENCAP];
- if (tb[RTA_ENCAP_TYPE])
+ if (tb[RTA_ENCAP_TYPE]) {
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ if (err < 0)
+ goto errout;
+ }
+
if (tb[RTA_EXPIRES]) {
unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
@@ -3306,7 +3327,8 @@ static int rt6_fill_node(struct net *net,
if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
goto nla_put_failure;
- lwtunnel_fill_encap(skb, rt->dst.lwtstate);
+ if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -3375,6 +3397,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (tb[RTA_MARK])
fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+ if (tb[RTA_UID])
+ fl6.flowi6_uid = make_kuid(current_user_ns(),
+ nla_get_u32(tb[RTA_UID]));
+ else
+ fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+
if (iif) {
struct net_device *dev;
int flags = 0;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
new file mode 100644
index 000000000000..a855eb325b03
--- /dev/null
+++ b/net/ipv6/seg6.c
@@ -0,0 +1,497 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <linux/seg6.h>
+#include <linux/seg6_genl.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+
+bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
+{
+ int trailing;
+ unsigned int tlv_offset;
+
+ if (srh->type != IPV6_SRCRT_TYPE_4)
+ return false;
+
+ if (((srh->hdrlen + 1) << 3) != len)
+ return false;
+
+ if (srh->segments_left != srh->first_segment)
+ return false;
+
+ tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
+
+ trailing = len - tlv_offset;
+ if (trailing < 0)
+ return false;
+
+ while (trailing) {
+ struct sr6_tlv *tlv;
+ unsigned int tlv_len;
+
+ tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
+ tlv_len = sizeof(*tlv) + tlv->len;
+
+ trailing -= tlv_len;
+ if (trailing < 0)
+ return false;
+
+ tlv_offset += tlv_len;
+ }
+
+ return true;
+}
+
+static struct genl_family seg6_genl_family;
+
+static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
+ [SEG6_ATTR_DST] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr) },
+ [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, },
+ [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, },
+ [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, },
+ [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, },
+ [SEG6_ATTR_ALGID] = { .type = NLA_U8, },
+ [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, },
+};
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+
+static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct seg6_pernet_data *sdata;
+ struct seg6_hmac_info *hinfo;
+ u32 hmackeyid;
+ char *secret;
+ int err = 0;
+ u8 algid;
+ u8 slen;
+
+ sdata = seg6_pernet(net);
+
+ if (!info->attrs[SEG6_ATTR_HMACKEYID] ||
+ !info->attrs[SEG6_ATTR_SECRETLEN] ||
+ !info->attrs[SEG6_ATTR_ALGID])
+ return -EINVAL;
+
+ hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]);
+ slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]);
+ algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]);
+
+ if (hmackeyid == 0)
+ return -EINVAL;
+
+ if (slen > SEG6_HMAC_SECRET_LEN)
+ return -EINVAL;
+
+ mutex_lock(&sdata->lock);
+ hinfo = seg6_hmac_info_lookup(net, hmackeyid);
+
+ if (!slen) {
+ if (!hinfo)
+ err = -ENOENT;
+
+ err = seg6_hmac_info_del(net, hmackeyid);
+
+ goto out_unlock;
+ }
+
+ if (!info->attrs[SEG6_ATTR_SECRET]) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (hinfo) {
+ err = seg6_hmac_info_del(net, hmackeyid);
+ if (err)
+ goto out_unlock;
+ }
+
+ secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]);
+
+ hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL);
+ if (!hinfo) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(hinfo->secret, secret, slen);
+ hinfo->slen = slen;
+ hinfo->alg_id = algid;
+ hinfo->hmackeyid = hmackeyid;
+
+ err = seg6_hmac_info_add(net, hmackeyid, hinfo);
+ if (err)
+ kfree(hinfo);
+
+out_unlock:
+ mutex_unlock(&sdata->lock);
+ return err;
+}
+
+#else
+
+static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+{
+ return -ENOTSUPP;
+}
+
+#endif
+
+static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct in6_addr *val, *t_old, *t_new;
+ struct seg6_pernet_data *sdata;
+
+ sdata = seg6_pernet(net);
+
+ if (!info->attrs[SEG6_ATTR_DST])
+ return -EINVAL;
+
+ val = nla_data(info->attrs[SEG6_ATTR_DST]);
+ t_new = kmemdup(val, sizeof(*val), GFP_KERNEL);
+ if (!t_new)
+ return -ENOMEM;
+
+ mutex_lock(&sdata->lock);
+
+ t_old = sdata->tun_src;
+ rcu_assign_pointer(sdata->tun_src, t_new);
+
+ mutex_unlock(&sdata->lock);
+
+ synchronize_net();
+ kfree(t_old);
+
+ return 0;
+}
+
+static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct in6_addr *tun_src;
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC);
+ if (!hdr)
+ goto free_msg;
+
+ rcu_read_lock();
+ tun_src = rcu_dereference(seg6_pernet(net)->tun_src);
+
+ if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src))
+ goto nla_put_failure;
+
+ rcu_read_unlock();
+
+ genlmsg_end(msg, hdr);
+ genlmsg_reply(msg, info);
+
+ return 0;
+
+nla_put_failure:
+ rcu_read_unlock();
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+
+static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo,
+ struct sk_buff *msg)
+{
+ if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) ||
+ nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) ||
+ nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) ||
+ nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id))
+ return -1;
+
+ return 0;
+}
+
+static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (__seg6_hmac_fill_info(hinfo, skb) < 0)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct seg6_pernet_data *sdata;
+ struct rhashtable_iter *iter;
+
+ sdata = seg6_pernet(net);
+ iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&sdata->hmac_infos, iter);
+
+ return 0;
+}
+
+static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+
+ kfree(iter);
+
+ return 0;
+}
+
+static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ struct seg6_pernet_data *sdata;
+ struct seg6_hmac_info *hinfo;
+ int ret;
+
+ sdata = seg6_pernet(net);
+
+ ret = rhashtable_walk_start(iter);
+ if (ret && ret != -EAGAIN)
+ goto done;
+
+ for (;;) {
+ hinfo = rhashtable_walk_next(iter);
+
+ if (IS_ERR(hinfo)) {
+ if (PTR_ERR(hinfo) == -EAGAIN)
+ continue;
+ ret = PTR_ERR(hinfo);
+ goto done;
+ } else if (!hinfo) {
+ break;
+ }
+
+ ret = __seg6_genl_dumphmac_element(hinfo,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb, SEG6_CMD_DUMPHMAC);
+ if (ret)
+ goto done;
+ }
+
+ ret = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return ret;
+}
+
+#else
+
+static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
+{
+ return 0;
+}
+
+static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
+{
+ return 0;
+}
+
+static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return -ENOTSUPP;
+}
+
+#endif
+
+static int __net_init seg6_net_init(struct net *net)
+{
+ struct seg6_pernet_data *sdata;
+
+ sdata = kzalloc(sizeof(*sdata), GFP_KERNEL);
+ if (!sdata)
+ return -ENOMEM;
+
+ mutex_init(&sdata->lock);
+
+ sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL);
+ if (!sdata->tun_src) {
+ kfree(sdata);
+ return -ENOMEM;
+ }
+
+ net->ipv6.seg6_data = sdata;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ seg6_hmac_net_init(net);
+#endif
+
+ return 0;
+}
+
+static void __net_exit seg6_net_exit(struct net *net)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ seg6_hmac_net_exit(net);
+#endif
+
+ kfree(sdata->tun_src);
+ kfree(sdata);
+}
+
+static struct pernet_operations ip6_segments_ops = {
+ .init = seg6_net_init,
+ .exit = seg6_net_exit,
+};
+
+static const struct genl_ops seg6_genl_ops[] = {
+ {
+ .cmd = SEG6_CMD_SETHMAC,
+ .doit = seg6_genl_sethmac,
+ .policy = seg6_genl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = SEG6_CMD_DUMPHMAC,
+ .start = seg6_genl_dumphmac_start,
+ .dumpit = seg6_genl_dumphmac,
+ .done = seg6_genl_dumphmac_done,
+ .policy = seg6_genl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = SEG6_CMD_SET_TUNSRC,
+ .doit = seg6_genl_set_tunsrc,
+ .policy = seg6_genl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = SEG6_CMD_GET_TUNSRC,
+ .doit = seg6_genl_get_tunsrc,
+ .policy = seg6_genl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static struct genl_family seg6_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = SEG6_GENL_NAME,
+ .version = SEG6_GENL_VERSION,
+ .maxattr = SEG6_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = seg6_genl_ops,
+ .n_ops = ARRAY_SIZE(seg6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+int __init seg6_init(void)
+{
+ int err = -ENOMEM;
+
+ err = genl_register_family(&seg6_genl_family);
+ if (err)
+ goto out;
+
+ err = register_pernet_subsys(&ip6_segments_ops);
+ if (err)
+ goto out_unregister_genl;
+
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ err = seg6_iptunnel_init();
+ if (err)
+ goto out_unregister_pernet;
+#endif
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ err = seg6_hmac_init();
+ if (err)
+ goto out_unregister_iptun;
+#endif
+
+ pr_info("Segment Routing with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+out_unregister_iptun:
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_iptunnel_exit();
+#endif
+#endif
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
+#endif
+out_unregister_genl:
+ genl_unregister_family(&seg6_genl_family);
+ goto out;
+}
+
+void seg6_exit(void)
+{
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ seg6_hmac_exit();
+#endif
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_iptunnel_exit();
+#endif
+ unregister_pernet_subsys(&ip6_segments_ops);
+ genl_unregister_family(&seg6_genl_family);
+}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
new file mode 100644
index 000000000000..03a064803626
--- /dev/null
+++ b/net/ipv6/seg6_hmac.c
@@ -0,0 +1,484 @@
+/*
+ * SR-IPv6 implementation -- HMAC functions
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/mroute6.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+#include <linux/cryptohash.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <net/seg6_hmac.h>
+#include <linux/random.h>
+
+static char * __percpu *hmac_ring;
+
+static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct seg6_hmac_info *hinfo = obj;
+
+ return (hinfo->hmackeyid != *(__u32 *)arg->key);
+}
+
+static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo)
+{
+ kfree_rcu(hinfo, rcu);
+}
+
+static void seg6_free_hi(void *ptr, void *arg)
+{
+ struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr;
+
+ if (hinfo)
+ seg6_hinfo_release(hinfo);
+}
+
+static const struct rhashtable_params rht_params = {
+ .head_offset = offsetof(struct seg6_hmac_info, node),
+ .key_offset = offsetof(struct seg6_hmac_info, hmackeyid),
+ .key_len = sizeof(u32),
+ .automatic_shrinking = true,
+ .obj_cmpfn = seg6_hmac_cmpfn,
+};
+
+static struct seg6_hmac_algo hmac_algos[] = {
+ {
+ .alg_id = SEG6_HMAC_ALGO_SHA1,
+ .name = "hmac(sha1)",
+ },
+ {
+ .alg_id = SEG6_HMAC_ALGO_SHA256,
+ .name = "hmac(sha256)",
+ },
+};
+
+static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
+{
+ struct sr6_tlv_hmac *tlv;
+
+ if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5)
+ return NULL;
+
+ if (!sr_has_hmac(srh))
+ return NULL;
+
+ tlv = (struct sr6_tlv_hmac *)
+ ((char *)srh + ((srh->hdrlen + 1) << 3) - 40);
+
+ if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38)
+ return NULL;
+
+ return tlv;
+}
+
+static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
+{
+ struct seg6_hmac_algo *algo;
+ int i, alg_count;
+
+ alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+ for (i = 0; i < alg_count; i++) {
+ algo = &hmac_algos[i];
+ if (algo->alg_id == alg_id)
+ return algo;
+ }
+
+ return NULL;
+}
+
+static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize,
+ u8 *output, int outlen)
+{
+ struct seg6_hmac_algo *algo;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
+ int ret, dgsize;
+
+ algo = __hmac_get_algo(hinfo->alg_id);
+ if (!algo)
+ return -ENOENT;
+
+ tfm = *this_cpu_ptr(algo->tfms);
+
+ dgsize = crypto_shash_digestsize(tfm);
+ if (dgsize > outlen) {
+ pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n",
+ dgsize, outlen);
+ return -ENOMEM;
+ }
+
+ ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen);
+ if (ret < 0) {
+ pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret);
+ goto failed;
+ }
+
+ shash = *this_cpu_ptr(algo->shashs);
+ shash->tfm = tfm;
+
+ ret = crypto_shash_digest(shash, text, psize, output);
+ if (ret < 0) {
+ pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret);
+ goto failed;
+ }
+
+ return dgsize;
+
+failed:
+ return ret;
+}
+
+int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
+ struct in6_addr *saddr, u8 *output)
+{
+ __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid);
+ u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE];
+ int plen, i, dgsize, wrsize;
+ char *ring, *off;
+
+ /* a 160-byte buffer for digest output allows to store highest known
+ * hash function (RadioGatun) with up to 1216 bits
+ */
+
+ /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */
+ plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
+
+ /* this limit allows for 14 segments */
+ if (plen >= SEG6_HMAC_RING_SIZE)
+ return -EMSGSIZE;
+
+ /* Let's build the HMAC text on the ring buffer. The text is composed
+ * as follows, in order:
+ *
+ * 1. Source IPv6 address (128 bits)
+ * 2. first_segment value (8 bits)
+ * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0)
+ * 4. HMAC Key ID (32 bits)
+ * 5. All segments in the segments list (n * 128 bits)
+ */
+
+ local_bh_disable();
+ ring = *this_cpu_ptr(hmac_ring);
+ off = ring;
+
+ /* source address */
+ memcpy(off, saddr, 16);
+ off += 16;
+
+ /* first_segment value */
+ *off++ = hdr->first_segment;
+
+ /* cleanup flag */
+ *off++ = !!(sr_has_cleanup(hdr)) << 7;
+
+ /* HMAC Key ID */
+ memcpy(off, &hmackeyid, 4);
+ off += 4;
+
+ /* all segments in the list */
+ for (i = 0; i < hdr->first_segment + 1; i++) {
+ memcpy(off, hdr->segments + i, 16);
+ off += 16;
+ }
+
+ dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
+ SEG6_HMAC_MAX_DIGESTSIZE);
+ local_bh_enable();
+
+ if (dgsize < 0)
+ return dgsize;
+
+ wrsize = SEG6_HMAC_FIELD_LEN;
+ if (wrsize > dgsize)
+ wrsize = dgsize;
+
+ memset(output, 0, SEG6_HMAC_FIELD_LEN);
+ memcpy(output, tmp_out, wrsize);
+
+ return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_compute);
+
+/* checks if an incoming SR-enabled packet's HMAC status matches
+ * the incoming policy.
+ *
+ * called with rcu_read_lock()
+ */
+bool seg6_hmac_validate_skb(struct sk_buff *skb)
+{
+ u8 hmac_output[SEG6_HMAC_FIELD_LEN];
+ struct net *net = dev_net(skb->dev);
+ struct seg6_hmac_info *hinfo;
+ struct sr6_tlv_hmac *tlv;
+ struct ipv6_sr_hdr *srh;
+ struct inet6_dev *idev;
+
+ idev = __in6_dev_get(skb->dev);
+
+ srh = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+ tlv = seg6_get_tlv_hmac(srh);
+
+ /* mandatory check but no tlv */
+ if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+ return false;
+
+ /* no check */
+ if (idev->cnf.seg6_require_hmac < 0)
+ return true;
+
+ /* check only if present */
+ if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+ return true;
+
+ /* now, seg6_require_hmac >= 0 && tlv */
+
+ hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+ if (!hinfo)
+ return false;
+
+ if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
+ return false;
+
+ if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL(seg6_hmac_validate_skb);
+
+/* called with rcu_read_lock() */
+struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+ struct seg6_hmac_info *hinfo;
+
+ hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+
+ return hinfo;
+}
+EXPORT_SYMBOL(seg6_hmac_info_lookup);
+
+int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+ int err;
+
+ err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node,
+ rht_params);
+
+ return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_add);
+
+int seg6_hmac_info_del(struct net *net, u32 key)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+ struct seg6_hmac_info *hinfo;
+ int err = -ENOENT;
+
+ hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+ if (!hinfo)
+ goto out;
+
+ err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node,
+ rht_params);
+ if (err)
+ goto out;
+
+ seg6_hinfo_release(hinfo);
+
+out:
+ return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_del);
+
+int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
+ struct ipv6_sr_hdr *srh)
+{
+ struct seg6_hmac_info *hinfo;
+ struct sr6_tlv_hmac *tlv;
+ int err = -ENOENT;
+
+ tlv = seg6_get_tlv_hmac(srh);
+ if (!tlv)
+ return -EINVAL;
+
+ rcu_read_lock();
+
+ hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+ if (!hinfo)
+ goto out;
+
+ memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN);
+ err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac);
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(seg6_push_hmac);
+
+static int seg6_hmac_init_ring(void)
+{
+ int i;
+
+ hmac_ring = alloc_percpu(char *);
+
+ if (!hmac_ring)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL);
+
+ if (!ring)
+ return -ENOMEM;
+
+ *per_cpu_ptr(hmac_ring, i) = ring;
+ }
+
+ return 0;
+}
+
+static int seg6_hmac_init_algo(void)
+{
+ struct seg6_hmac_algo *algo;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
+ int i, alg_count, cpu;
+
+ alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+
+ for (i = 0; i < alg_count; i++) {
+ struct crypto_shash **p_tfm;
+ int shsize;
+
+ algo = &hmac_algos[i];
+ algo->tfms = alloc_percpu(struct crypto_shash *);
+ if (!algo->tfms)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+ p_tfm = per_cpu_ptr(algo->tfms, cpu);
+ *p_tfm = tfm;
+ }
+
+ p_tfm = raw_cpu_ptr(algo->tfms);
+ tfm = *p_tfm;
+
+ shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
+
+ algo->shashs = alloc_percpu(struct shash_desc *);
+ if (!algo->shashs)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ shash = kzalloc(shsize, GFP_KERNEL);
+ if (!shash)
+ return -ENOMEM;
+ *per_cpu_ptr(algo->shashs, cpu) = shash;
+ }
+ }
+
+ return 0;
+}
+
+int __init seg6_hmac_init(void)
+{
+ int ret;
+
+ ret = seg6_hmac_init_ring();
+ if (ret < 0)
+ goto out;
+
+ ret = seg6_hmac_init_algo();
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL(seg6_hmac_init);
+
+int __net_init seg6_hmac_net_init(struct net *net)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+ rhashtable_init(&sdata->hmac_infos, &rht_params);
+
+ return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_net_init);
+
+void seg6_hmac_exit(void)
+{
+ struct seg6_hmac_algo *algo = NULL;
+ int i, alg_count, cpu;
+
+ for_each_possible_cpu(i) {
+ char *ring = *per_cpu_ptr(hmac_ring, i);
+
+ kfree(ring);
+ }
+ free_percpu(hmac_ring);
+
+ alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+ for (i = 0; i < alg_count; i++) {
+ algo = &hmac_algos[i];
+ for_each_possible_cpu(cpu) {
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
+
+ shash = *per_cpu_ptr(algo->shashs, cpu);
+ kfree(shash);
+ tfm = *per_cpu_ptr(algo->tfms, cpu);
+ crypto_free_shash(tfm);
+ }
+ free_percpu(algo->tfms);
+ free_percpu(algo->shashs);
+ }
+}
+EXPORT_SYMBOL(seg6_hmac_exit);
+
+void __net_exit seg6_hmac_net_exit(struct net *net)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+ rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL);
+}
+EXPORT_SYMBOL(seg6_hmac_net_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
new file mode 100644
index 000000000000..c46f8cbf5ab5
--- /dev/null
+++ b/net/ipv6/seg6_iptunnel.c
@@ -0,0 +1,436 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_iptunnel.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#ifdef CONFIG_DST_CACHE
+#include <net/dst_cache.h>
+#endif
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+
+struct seg6_lwt {
+#ifdef CONFIG_DST_CACHE
+ struct dst_cache cache;
+#endif
+ struct seg6_iptunnel_encap tuninfo[0];
+};
+
+static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+ return (struct seg6_lwt *)lwt->data;
+}
+
+static inline struct seg6_iptunnel_encap *
+seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
+{
+ return seg6_lwt_lwtunnel(lwt)->tuninfo;
+}
+
+static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
+ [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
+};
+
+int nla_put_srh(struct sk_buff *skb, int attrtype,
+ struct seg6_iptunnel_encap *tuninfo)
+{
+ struct seg6_iptunnel_encap *data;
+ struct nlattr *nla;
+ int len;
+
+ len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, tuninfo, len);
+
+ return 0;
+}
+
+static void set_tun_src(struct net *net, struct net_device *dev,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ struct seg6_pernet_data *sdata = seg6_pernet(net);
+ struct in6_addr *tun_src;
+
+ rcu_read_lock();
+
+ tun_src = rcu_dereference(sdata->tun_src);
+
+ if (!ipv6_addr_any(tun_src)) {
+ memcpy(saddr, tun_src, sizeof(struct in6_addr));
+ } else {
+ ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
+ saddr);
+ }
+
+ rcu_read_unlock();
+}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ struct ipv6hdr *hdr, *inner_hdr;
+ struct ipv6_sr_hdr *isrh;
+ int hdrlen, tot_len, err;
+
+ hdrlen = (osrh->hdrlen + 1) << 3;
+ tot_len = hdrlen + sizeof(*hdr);
+
+ err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+ if (unlikely(err))
+ return err;
+
+ inner_hdr = ipv6_hdr(skb);
+
+ skb_push(skb, tot_len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ hdr = ipv6_hdr(skb);
+
+ /* inherit tc, flowlabel and hlim
+ * hlim will be decremented in ip6_forward() afterwards and
+ * decapsulation will overwrite inner hlim with outer hlim
+ */
+ ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+ ip6_flowlabel(inner_hdr));
+ hdr->hop_limit = inner_hdr->hop_limit;
+ hdr->nexthdr = NEXTHDR_ROUTING;
+
+ isrh = (void *)hdr + sizeof(*hdr);
+ memcpy(isrh, osrh, hdrlen);
+
+ isrh->nexthdr = NEXTHDR_IPV6;
+
+ hdr->daddr = isrh->segments[isrh->first_segment];
+ set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (sr_has_hmac(isrh)) {
+ err = seg6_push_hmac(net, &hdr->saddr, isrh);
+ if (unlikely(err))
+ return err;
+ }
+#endif
+
+ skb_postpush_rcsum(skb, hdr, tot_len);
+
+ return 0;
+}
+
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+#ifdef CONFIG_IPV6_SEG6_INLINE
+static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ struct ipv6hdr *hdr, *oldhdr;
+ struct ipv6_sr_hdr *isrh;
+ int hdrlen, err;
+
+ hdrlen = (osrh->hdrlen + 1) << 3;
+
+ err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+
+ skb_pull(skb, sizeof(struct ipv6hdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ sizeof(struct ipv6hdr));
+
+ skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+
+ memmove(hdr, oldhdr, sizeof(*hdr));
+
+ isrh = (void *)hdr + sizeof(*hdr);
+ memcpy(isrh, osrh, hdrlen);
+
+ isrh->nexthdr = hdr->nexthdr;
+ hdr->nexthdr = NEXTHDR_ROUTING;
+
+ isrh->segments[0] = hdr->daddr;
+ hdr->daddr = isrh->segments[isrh->first_segment];
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (sr_has_hmac(isrh)) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ err = seg6_push_hmac(net, &hdr->saddr, isrh);
+ if (unlikely(err))
+ return err;
+ }
+#endif
+
+ skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
+
+ return 0;
+}
+#endif
+
+static int seg6_do_srh(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct seg6_iptunnel_encap *tinfo;
+ int err = 0;
+
+ tinfo = seg6_encap_lwtunnel(dst->lwtstate);
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ switch (tinfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+ case SEG6_IPTUN_MODE_INLINE:
+ err = seg6_do_srh_inline(skb, tinfo->srh);
+ skb_reset_inner_headers(skb);
+ break;
+#endif
+ case SEG6_IPTUN_MODE_ENCAP:
+ err = seg6_do_srh_encap(skb, tinfo->srh);
+ break;
+ }
+
+ if (err)
+ return err;
+
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ skb_set_inner_protocol(skb, skb->protocol);
+
+ return 0;
+}
+
+int seg6_input(struct sk_buff *skb)
+{
+ int err;
+
+ err = seg6_do_srh(skb);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ skb_dst_drop(skb);
+ ip6_route_input(skb);
+
+ return dst_input(skb);
+}
+
+int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
+ struct seg6_lwt *slwt;
+ int err = -EINVAL;
+
+ err = seg6_do_srh(skb);
+ if (unlikely(err))
+ goto drop;
+
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+#ifdef CONFIG_DST_CACHE
+ preempt_disable();
+ dst = dst_cache_get(&slwt->cache);
+ preempt_enable();
+#endif
+
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = dst->error;
+ dst_release(dst);
+ goto drop;
+ }
+
+#ifdef CONFIG_DST_CACHE
+ preempt_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ preempt_enable();
+#endif
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ return dst_output(net, sk, skb);
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int seg6_build_state(struct net_device *dev, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts)
+{
+ struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
+ struct seg6_iptunnel_encap *tuninfo;
+ struct lwtunnel_state *newts;
+ int tuninfo_len, min_size;
+ struct seg6_lwt *slwt;
+ int err;
+
+ err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
+ seg6_iptunnel_policy);
+
+ if (err < 0)
+ return err;
+
+ if (!tb[SEG6_IPTUNNEL_SRH])
+ return -EINVAL;
+
+ tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
+ tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
+
+ /* tuninfo must contain at least the iptunnel encap structure,
+ * the SRH and one segment
+ */
+ min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
+ sizeof(struct in6_addr);
+ if (tuninfo_len < min_size)
+ return -EINVAL;
+
+ switch (tuninfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+ case SEG6_IPTUN_MODE_INLINE:
+ break;
+#endif
+ case SEG6_IPTUN_MODE_ENCAP:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* verify that SRH is consistent */
+ if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
+ if (!newts)
+ return -ENOMEM;
+
+ slwt = seg6_lwt_lwtunnel(newts);
+
+#ifdef CONFIG_DST_CACHE
+ err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+ if (err) {
+ kfree(newts);
+ return err;
+ }
+#endif
+
+ memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
+
+ newts->type = LWTUNNEL_ENCAP_SEG6;
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+ LWTUNNEL_STATE_INPUT_REDIRECT;
+ newts->headroom = seg6_lwt_headroom(tuninfo);
+
+ *ts = newts;
+
+ return 0;
+}
+
+#ifdef CONFIG_DST_CACHE
+static void seg6_destroy_state(struct lwtunnel_state *lwt)
+{
+ dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
+}
+#endif
+
+static int seg6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+ if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+ return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
+}
+
+static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
+ struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
+ int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
+
+ if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
+ return 1;
+
+ return memcmp(a_hdr, b_hdr, len);
+}
+
+static const struct lwtunnel_encap_ops seg6_iptun_ops = {
+ .build_state = seg6_build_state,
+#ifdef CONFIG_DST_CACHE
+ .destroy_state = seg6_destroy_state,
+#endif
+ .output = seg6_output,
+ .input = seg6_input,
+ .fill_encap = seg6_fill_encap_info,
+ .get_encap_size = seg6_encap_nlsize,
+ .cmp_encap = seg6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init seg6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
+
+void seg6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b1cdf8009d29..fad992ad4bc8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -31,7 +31,7 @@
#include <linux/if_arp.h>
#include <linux/icmp.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
@@ -76,7 +76,7 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
__be32 *v4dst);
static struct rtnl_link_ops sit_link_ops __read_mostly;
-static int sit_net_id __read_mostly;
+static unsigned int sit_net_id __read_mostly;
struct sit_net {
struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
@@ -1318,23 +1318,11 @@ done:
return err;
}
-static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
-
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops ipip6_netdev_ops = {
.ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
- .ndo_change_mtu = ipip6_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
};
@@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_SIT;
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - t_hlen;
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 59c483937aec..a4d49760bf43 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq->snt_synack.v64 = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->ts_off = 0;
/*
* We need to lookup the dst_entry to get the correct window size.
@@ -227,6 +228,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
+ fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b9f1fee9a886..cb8929681dc7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
{
return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32,
tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
+ tcp_hdr(skb)->source, tsoff);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
+ fl6.flowi6_uid = sk->sk_uid;
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
@@ -282,7 +283,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
- inet->inet_dport);
+ inet->inet_dport,
+ &tp->tsoffset);
err = tcp_connect(sk);
if (err)
@@ -397,7 +399,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
- &tp->tsq_flags))
+ &sk->sk_tsq_flags))
sock_hold(sk);
goto out;
}
@@ -467,7 +469,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -828,6 +830,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
+ fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
@@ -837,7 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -954,7 +957,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+ tcp_time_stamp + tcp_rsk(req)->ts_off,
+ req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e4a8000d59ad..4d5c4eee4b3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -35,7 +35,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -302,7 +302,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
- IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
+ IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
@@ -334,7 +335,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
int is_udp4;
- bool slow;
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
@@ -344,8 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
try_again:
peeking = off = sk_peek_offset(sk, flags);
- skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
- &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -364,7 +363,8 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
+ if (copied < ulen || peeking ||
+ (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
@@ -378,7 +378,6 @@ try_again:
goto csum_copy_err;
}
if (unlikely(err)) {
- trace_kfree_skb(skb, udpv6_recvmsg);
if (!peeked) {
atomic_inc(&sk->sk_drops);
if (is_udp4)
@@ -388,7 +387,7 @@ try_again:
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
is_udplite);
}
- skb_free_datagram_locked(sk, skb);
+ kfree_skb(skb);
return err;
}
if (!peeked) {
@@ -427,7 +426,7 @@ try_again:
if (is_udp4) {
if (inet->cmsg_flags)
- ip_cmsg_recv_offset(msg, skb,
+ ip_cmsg_recv_offset(msg, sk, skb,
sizeof(struct udphdr), off);
} else {
if (np->rxopt.all)
@@ -438,12 +437,11 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
- __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
+ skb_consume_udp(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
- slow = lock_sock_fast(sk);
- if (!skb_kill_datagram(sk, skb, flags)) {
+ if (!__sk_queue_drop_skb(sk, skb, flags)) {
if (is_udp4) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_CSUMERRORS, is_udplite);
@@ -456,7 +454,7 @@ csum_copy_err:
UDP_MIB_INERRORS, is_udplite);
}
}
- unlock_sock_fast(sk, slow);
+ kfree_skb(skb);
/* starting over for a new packet, but check if we need to yield */
cond_resched();
@@ -522,9 +520,11 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
sk_incoming_cpu_update(sk);
+ } else {
+ sk_mark_napi_id_once(sk, skb);
}
- rc = __sock_queue_rcv_skb(sk, skb);
+ rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -536,6 +536,7 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return -1;
}
+
return 0;
}
@@ -557,7 +558,6 @@ EXPORT_SYMBOL(udpv6_encap_enable);
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
- int rc;
int is_udplite = IS_UDPLITE(sk);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -623,25 +623,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
udp_csum_pull_header(skb);
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- __UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
- goto drop;
- }
skb_dst_drop(skb);
- bh_lock_sock(sk);
- rc = 0;
- if (!sock_owned_by_user(sk))
- rc = __udpv6_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
- bh_unlock_sock(sk);
- goto drop;
- }
- bh_unlock_sock(sk);
-
- return rc;
+ return __udpv6_queue_rcv_skb(sk, skb);
csum_error:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -1156,6 +1141,7 @@ do_udp_sendmsg:
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_uid = sk->sk_uid;
sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
@@ -1434,12 +1420,12 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = __udpv6_queue_rcv_skb,
.release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2f5101a12283..2784cc363f2b 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -45,10 +45,11 @@ struct proto udplitev6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = __udpv6_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index e1c0bbe7996c..d7b731a78d09 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -44,7 +44,7 @@ struct xfrm6_tunnel_net {
u32 spi;
};
-static int xfrm6_tunnel_net_id __read_mostly;
+static unsigned int xfrm6_tunnel_net_id __read_mostly;
static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
{
return net_generic(net, xfrm6_tunnel_net_id);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 48d0dc89b58d..8a9219ff2e77 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -56,7 +56,7 @@
#include <net/tcp_states.h>
#include <net/net_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/* Configuration Variables */
static unsigned char ipxcfg_max_hops = 16;
@@ -1809,7 +1809,7 @@ static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
if (rc)
goto out_free;
- if (skb->tstamp.tv64)
+ if (skb->tstamp)
sk->sk_stamp = skb->tstamp;
if (sipx) {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 391c3cbd2eed..ab254041dab7 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -52,7 +52,7 @@
#include <linux/poll.h>
#include <asm/ioctls.h> /* TIOCOUTQ, TIOCINQ */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/sock.h>
#include <net/tcp_states.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 873c4b707d6a..817b1b186aff 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -40,7 +40,7 @@
#include <linux/interrupt.h>
#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/irda/irda.h>
#include <net/irda/irmod.h>
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 8f5678cb6263..f18070118d05 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -32,7 +32,7 @@
#include <linux/tty.h>
#include <linux/serial.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/irda/irda.h>
#include <net/irda/irmod.h>
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 856736656a30..890b90d055d5 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -43,7 +43,7 @@
#include <linux/export.h>
#include <asm/ioctls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/dma.h>
#include <asm/io.h>
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d8b7267280c3..74d09f91709e 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -51,7 +51,6 @@ static const struct net_device_ops irlan_eth_netdev_ops = {
.ndo_stop = irlan_eth_close,
.ndo_start_xmit = irlan_eth_xmit,
.ndo_set_rx_mode = irlan_eth_set_multicast_list,
- .ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
@@ -67,7 +66,8 @@ static void irlan_eth_setup(struct net_device *dev)
dev->netdev_ops = &irlan_eth_netdev_ops;
dev->destructor = free_netdev;
-
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
/*
* Lets do all queueing in IrTTP instead of this device driver.
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 8d65bb9477fc..9d451f8ed47a 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -245,12 +245,11 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/netdevice.h>
-#include <linux/miscdevice.h>
#include <linux/poll.h>
#include <linux/capability.h>
#include <linux/ctype.h> /* isspace() */
#include <linux/string.h> /* skip_spaces() */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/ppp_defs.h>
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index 940225866da0..32061442cc8e 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -15,13 +15,10 @@
/***************************** INCLUDES *****************************/
#include "irnet.h" /* Module global include */
+#include <linux/miscdevice.h>
/************************ CONSTANTS & MACROS ************************/
-/* /dev/irnet file constants */
-#define IRNET_MAJOR 10 /* Misc range */
-#define IRNET_MINOR 187 /* Official allocation */
-
/* IrNET control channel stuff */
#define IRNET_MAX_COMMAND 256 /* Max length of a command line */
@@ -111,9 +108,9 @@ static const struct file_operations irnet_device_fops =
/* Structure so that the misc major (drivers/char/misc.c) take care of us... */
static struct miscdevice irnet_misc_device =
{
- IRNET_MINOR,
- "irnet",
- &irnet_device_fops
+ .minor = IRNET_MINOR,
+ .name = "irnet",
+ .fops = &irnet_device_fops
};
#endif /* IRNET_PPP_H */
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index e15c40e86660..7fc340e574cf 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -24,13 +24,7 @@
-static struct genl_family irda_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = IRDA_NL_NAME,
- .hdrsize = 0,
- .version = IRDA_NL_VERSION,
- .maxattr = IRDA_NL_CMD_MAX,
-};
+static struct genl_family irda_nl_family;
static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
{
@@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = {
};
-int irda_nl_register(void)
+static struct genl_family irda_nl_family __ro_after_init = {
+ .name = IRDA_NL_NAME,
+ .hdrsize = 0,
+ .version = IRDA_NL_VERSION,
+ .maxattr = IRDA_NL_CMD_MAX,
+ .module = THIS_MODULE,
+ .ops = irda_nl_ops,
+ .n_ops = ARRAY_SIZE(irda_nl_ops),
+};
+
+int __init irda_nl_register(void)
{
- return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops);
+ return genl_register_family(&irda_nl_family);
}
void irda_nl_unregister(void)
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
index b9ac598e2116..77cfdde9d82f 100644
--- a/net/irda/irproc.c
+++ b/net/irda/irproc.c
@@ -23,7 +23,6 @@
*
********************************************************************/
-#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/module.h>
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 02b45a8e8b35..13190b38f22e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -453,19 +453,27 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
}
}
-/* Send FIN through an IUCV socket for HIPER transport */
+/* Send controlling flags through an IUCV socket for HIPER transport */
static int iucv_send_ctrl(struct sock *sk, u8 flags)
{
int err = 0;
int blen;
struct sk_buff *skb;
+ u8 shutdown = 0;
blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ if (sk->sk_shutdown & SEND_SHUTDOWN) {
+ /* controlling flags should be sent anyway */
+ shutdown = sk->sk_shutdown;
+ sk->sk_shutdown &= RCV_SHUTDOWN;
+ }
skb = sock_alloc_send_skb(sk, blen, 1, &err);
if (skb) {
skb_reserve(skb, blen);
err = afiucv_hs_send(NULL, sk, skb, flags);
}
+ if (shutdown)
+ sk->sk_shutdown = shutdown;
return err;
}
@@ -1036,7 +1044,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
- size_t headroom, linear;
+ size_t headroom = 0;
+ size_t linear;
struct sk_buff *skb;
struct iucv_message txmsg = {0};
struct cmsghdr *cmsg;
@@ -1114,18 +1123,20 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
* this is fine for SOCK_SEQPACKET (unless we want to support
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
- headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
- ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
- if (headroom + len < PAGE_SIZE) {
+ if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+ headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
linear = len;
} else {
- /* In nonlinear "classic" iucv skb,
- * reserve space for iucv_array
- */
- if (iucv->transport != AF_IUCV_TRANS_HIPER)
- headroom += sizeof(struct iucv_array) *
- (MAX_SKB_FRAGS + 1);
- linear = PAGE_SIZE - headroom;
+ if (len < PAGE_SIZE) {
+ linear = len;
+ } else {
+ /* In nonlinear "classic" iucv skb,
+ * reserve space for iucv_array
+ */
+ headroom = sizeof(struct iucv_array) *
+ (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
}
skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
noblock, &err, 0);
@@ -1315,8 +1326,13 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
}
IUCV_SKB_CB(skb)->offset = 0;
- if (sock_queue_rcv_skb(sk, skb))
- skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb);
+ if (sk_filter(sk, skb)) {
+ atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ kfree_skb(skb);
+ return;
+ }
+ if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
}
/* iucv_process_message_q() - Process outstanding IUCV messages
@@ -1430,13 +1446,13 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
rskb = skb_dequeue(&iucv->backlog_skb_q);
while (rskb) {
IUCV_SKB_CB(rskb)->offset = 0;
- if (sock_queue_rcv_skb(sk, rskb)) {
+ if (__sock_queue_rcv_skb(sk, rskb)) {
+ /* handle rcv queue full */
skb_queue_head(&iucv->backlog_skb_q,
rskb);
break;
- } else {
- rskb = skb_dequeue(&iucv->backlog_skb_q);
}
+ rskb = skb_dequeue(&iucv->backlog_skb_q);
}
if (skb_queue_empty(&iucv->backlog_skb_q)) {
if (!list_empty(&iucv->message_q.list))
@@ -2116,12 +2132,17 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
skb_reset_transport_header(skb);
skb_reset_network_header(skb);
IUCV_SKB_CB(skb)->offset = 0;
+ if (sk_filter(sk, skb)) {
+ atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
spin_lock(&iucv->message_q.lock);
if (skb_queue_empty(&iucv->backlog_skb_q)) {
- if (sock_queue_rcv_skb(sk, skb)) {
+ if (__sock_queue_rcv_skb(sk, skb))
/* handle rcv queue full */
skb_queue_tail(&iucv->backlog_skb_q, skb);
- }
} else
skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
spin_unlock(&iucv->message_q.lock);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 88a2a3ba4212..8f7ef167c45a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -639,7 +639,7 @@ static void iucv_disable(void)
put_online_cpus();
}
-static void free_iucv_data(int cpu)
+static int iucv_cpu_dead(unsigned int cpu)
{
kfree(iucv_param_irq[cpu]);
iucv_param_irq[cpu] = NULL;
@@ -647,9 +647,10 @@ static void free_iucv_data(int cpu)
iucv_param[cpu] = NULL;
kfree(iucv_irq_data[cpu]);
iucv_irq_data[cpu] = NULL;
+ return 0;
}
-static int alloc_iucv_data(int cpu)
+static int iucv_cpu_prepare(unsigned int cpu)
{
/* Note: GFP_DMA used to get memory below 2G */
iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
@@ -671,58 +672,38 @@ static int alloc_iucv_data(int cpu)
return 0;
out_free:
- free_iucv_data(cpu);
+ iucv_cpu_dead(cpu);
return -ENOMEM;
}
-static int iucv_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int iucv_cpu_online(unsigned int cpu)
{
- cpumask_t cpumask;
- long cpu = (long) hcpu;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- if (alloc_iucv_data(cpu))
- return notifier_from_errno(-ENOMEM);
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- free_iucv_data(cpu);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- if (!iucv_path_table)
- break;
- smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
- break;
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- if (!iucv_path_table)
- break;
- cpumask_copy(&cpumask, &iucv_buffer_cpumask);
- cpumask_clear_cpu(cpu, &cpumask);
- if (cpumask_empty(&cpumask))
- /* Can't offline last IUCV enabled cpu. */
- return notifier_from_errno(-EINVAL);
- smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
- if (cpumask_empty(&iucv_irq_cpumask))
- smp_call_function_single(
- cpumask_first(&iucv_buffer_cpumask),
- iucv_allow_cpu, NULL, 1);
- break;
- }
- return NOTIFY_OK;
+ if (!iucv_path_table)
+ return 0;
+ iucv_declare_cpu(NULL);
+ return 0;
}
-static struct notifier_block __refdata iucv_cpu_notifier = {
- .notifier_call = iucv_cpu_notify,
-};
+static int iucv_cpu_down_prep(unsigned int cpu)
+{
+ cpumask_t cpumask;
+
+ if (!iucv_path_table)
+ return 0;
+
+ cpumask_copy(&cpumask, &iucv_buffer_cpumask);
+ cpumask_clear_cpu(cpu, &cpumask);
+ if (cpumask_empty(&cpumask))
+ /* Can't offline last IUCV enabled cpu. */
+ return -EINVAL;
+
+ iucv_retrieve_cpu(NULL);
+ if (!cpumask_empty(&iucv_irq_cpumask))
+ return 0;
+ smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
+ iucv_allow_cpu, NULL, 1);
+ return 0;
+}
/**
* iucv_sever_pathid
@@ -2027,6 +2008,7 @@ struct iucv_interface iucv_if = {
};
EXPORT_SYMBOL(iucv_if);
+static enum cpuhp_state iucv_online;
/**
* iucv_init
*
@@ -2035,7 +2017,6 @@ EXPORT_SYMBOL(iucv_if);
static int __init iucv_init(void)
{
int rc;
- int cpu;
if (!MACHINE_IS_VM) {
rc = -EPROTONOSUPPORT;
@@ -2054,23 +2035,19 @@ static int __init iucv_init(void)
goto out_int;
}
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu) {
- if (alloc_iucv_data(cpu)) {
- rc = -ENOMEM;
- goto out_free;
- }
- }
- rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
+ rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
+ iucv_cpu_prepare, iucv_cpu_dead);
if (rc)
- goto out_free;
-
- cpu_notifier_register_done();
+ goto out_dev;
+ rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
+ iucv_cpu_online, iucv_cpu_down_prep);
+ if (rc < 0)
+ goto out_prep;
+ iucv_online = rc;
rc = register_reboot_notifier(&iucv_reboot_notifier);
if (rc)
- goto out_cpu;
+ goto out_remove_hp;
ASCEBC(iucv_error_no_listener, 16);
ASCEBC(iucv_error_no_memory, 16);
ASCEBC(iucv_error_pathid, 16);
@@ -2084,15 +2061,11 @@ static int __init iucv_init(void)
out_reboot:
unregister_reboot_notifier(&iucv_reboot_notifier);
-out_cpu:
- cpu_notifier_register_begin();
- __unregister_hotcpu_notifier(&iucv_cpu_notifier);
-out_free:
- for_each_possible_cpu(cpu)
- free_iucv_data(cpu);
-
- cpu_notifier_register_done();
-
+out_remove_hp:
+ cpuhp_remove_state(iucv_online);
+out_prep:
+ cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
+out_dev:
root_device_unregister(iucv_root);
out_int:
unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
@@ -2110,7 +2083,6 @@ out:
static void __exit iucv_exit(void)
{
struct iucv_irq_list *p, *n;
- int cpu;
spin_lock_irq(&iucv_queue_lock);
list_for_each_entry_safe(p, n, &iucv_task_queue, list)
@@ -2119,11 +2091,9 @@ static void __exit iucv_exit(void)
kfree(p);
spin_unlock_irq(&iucv_queue_lock);
unregister_reboot_notifier(&iucv_reboot_notifier);
- cpu_notifier_register_begin();
- __unregister_hotcpu_notifier(&iucv_cpu_notifier);
- for_each_possible_cpu(cpu)
- free_iucv_data(cpu);
- cpu_notifier_register_done();
+
+ cpuhp_remove_state_nocalls(iucv_online);
+ cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
root_device_unregister(iucv_root);
bus_unregister(&iucv_bus);
unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f9c9ecb0cdd3..c6252ed42c1d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -36,7 +36,7 @@
#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
-static int pfkey_net_id __read_mostly;
+static unsigned int pfkey_net_id __read_mostly;
struct netns_pfkey {
/* List of all pfkey sockets. */
struct hlist_head table;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a2ed3bda4ddc..85948c69b236 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
l2tp_info(session, L2TP_MSG_SEQ,
"%s: requested to enable seq numbers by LNS\n",
session->name);
- session->send_seq = -1;
+ session->send_seq = 1;
l2tp_session_set_header_len(session, tunnel->version);
}
} else {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2599af6378e4..8f560f7140a0 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -23,16 +23,6 @@
#define L2TP_HASH_BITS_2 8
#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
-/* Debug message categories for the DEBUG socket option */
-enum {
- L2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if
- * compiled in) */
- L2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel
- * interface */
- L2TP_MSG_SEQ = (1 << 2), /* sequence numbers */
- L2TP_MSG_DATA = (1 << 3), /* data packets */
-};
-
struct sk_buff;
struct l2tp_stats {
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e344cef..e2c6ae024565 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -259,6 +259,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
session->mtu = dev->mtu - session->hdr_len;
dev->mtu = session->mtu;
dev->needed_headroom += session->hdr_len;
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
priv = netdev_priv(dev);
priv->dev = dev;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 8938b6ba57a0..3d73278b86ca 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -47,7 +47,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
return (struct l2tp_ip_sock *)sk;
}
-static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
+ __be32 raddr, int dif, u32 tunnel_id)
{
struct sock *sk;
@@ -61,6 +62,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+ (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) &&
(!sk->sk_bound_dev_if || !dif ||
sk->sk_bound_dev_if == dif))
goto found;
@@ -71,15 +73,6 @@ found:
return sk;
}
-static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
-{
- struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
- if (sk)
- sock_hold(sk);
-
- return sk;
-}
-
/* When processing receive frames, there are two cases to
* consider. Data frames consist of a non-zero session-id and an
* optional cookie. Control frames consist of a regular L2TP header
@@ -183,8 +176,8 @@ pass_up:
struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb),
- tunnel_id);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
+ inet_iif(skb), tunnel_id);
if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
goto discard;
@@ -280,7 +273,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
write_lock_bh(&l2tp_ip_lock);
- if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+ if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
write_unlock_bh(&l2tp_ip_lock);
ret = -EADDRINUSE;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aa821cb639e5..331ccf5a7bad 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
struct in6_addr *laddr,
+ const struct in6_addr *raddr,
int dif, u32 tunnel_id)
{
struct sock *sk;
sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
- const struct in6_addr *addr = inet6_rcv_saddr(sk);
+ const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
+ const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
if (l2tp == NULL)
@@ -72,7 +74,8 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
- (!addr || ipv6_addr_equal(addr, laddr)) &&
+ (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) &&
+ (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) &&
(!sk->sk_bound_dev_if || !dif ||
sk->sk_bound_dev_if == dif))
goto found;
@@ -83,17 +86,6 @@ found:
return sk;
}
-static inline struct sock *l2tp_ip6_bind_lookup(struct net *net,
- struct in6_addr *laddr,
- int dif, u32 tunnel_id)
-{
- struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id);
- if (sk)
- sock_hold(sk);
-
- return sk;
-}
-
/* When processing receive frames, there are two cases to
* consider. Data frames consist of a non-zero session-id and an
* optional cookie. Control frames consist of a regular L2TP header
@@ -197,8 +189,8 @@ pass_up:
struct ipv6hdr *iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb),
- tunnel_id);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+ inet6_iif(skb), tunnel_id);
if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
goto discard;
@@ -330,7 +322,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rcu_read_unlock();
write_lock_bh(&l2tp_ip6_lock);
- if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if,
+ if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
addr->l2tp_conn_id)) {
write_unlock_bh(&l2tp_ip6_lock);
err = -EADDRINUSE;
@@ -525,6 +517,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_uid = sk->sk_uid;
ipc6.hlimit = -1;
ipc6.tclass = -1;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bf3117771822..3620fba31786 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -31,14 +31,7 @@
#include "l2tp_core.h"
-static struct genl_family l2tp_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = L2TP_GENL_NAME,
- .version = L2TP_GENL_VERSION,
- .hdrsize = 0,
- .maxattr = L2TP_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family l2tp_nl_family;
static const struct genl_multicast_group l2tp_multicast_group[] = {
{
@@ -227,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
if (info->attrs[L2TP_ATTR_UDP_DPORT])
cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
- if (info->attrs[L2TP_ATTR_UDP_CSUM])
- cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+ cfg.use_udp_checksums = nla_get_flag(
+ info->attrs[L2TP_ATTR_UDP_CSUM]);
#if IS_ENABLED(CONFIG_IPV6)
- if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
- cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
- if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
- cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+ cfg.udp6_zero_tx_checksums = nla_get_flag(
+ info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+ cfg.udp6_zero_rx_checksums = nla_get_flag(
+ info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
#endif
}
@@ -386,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
+ switch (sk->sk_family) {
+ case AF_INET:
+ if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
+ goto nla_put_failure;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (udp_get_no_check6_tx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
+ goto nla_put_failure;
+ if (udp_get_no_check6_rx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
+ goto nla_put_failure;
+ break;
+#endif
+ }
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
- nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
- nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
+ nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
goto nla_put_failure;
/* NOBREAK */
case L2TP_ENCAPTYPE_IP:
@@ -977,6 +985,19 @@ static const struct genl_ops l2tp_nl_ops[] = {
},
};
+static struct genl_family l2tp_nl_family __ro_after_init = {
+ .name = L2TP_GENL_NAME,
+ .version = L2TP_GENL_VERSION,
+ .hdrsize = 0,
+ .maxattr = L2TP_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = l2tp_nl_ops,
+ .n_ops = ARRAY_SIZE(l2tp_nl_ops),
+ .mcgrps = l2tp_multicast_group,
+ .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group),
+};
+
int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
{
int ret;
@@ -1010,12 +1031,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
}
EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
-static int l2tp_nl_init(void)
+static int __init l2tp_nl_init(void)
{
pr_info("L2TP netlink interface\n");
- return genl_register_family_with_ops_groups(&l2tp_nl_family,
- l2tp_nl_ops,
- l2tp_multicast_group);
+ return genl_register_family(&l2tp_nl_family);
}
static void l2tp_nl_cleanup(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 41d47bfda15c..36cc56fd0418 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -231,14 +231,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
- l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ l2tp_dbg(session, L2TP_MSG_DATA,
"%s: recv %d byte data frame, passing to ppp\n",
session->name, data_len);
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
- l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ l2tp_dbg(session, L2TP_MSG_DATA,
"%s: recv %d byte data frame, passing to L2TP socket\n",
session->name, data_len);
@@ -251,7 +251,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
return;
no_sock:
- l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: no socket\n", session->name);
+ l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
kfree_skb(skb);
}
@@ -773,7 +773,7 @@ out_no_ppp:
/* This is how we get the session context from the socket. */
sk->sk_user_data = session;
sk->sk_state = PPPOX_CONNECTED;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
session->name);
end:
@@ -827,7 +827,7 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
ps = l2tp_session_priv(session);
ps->tunnel_sock = tunnel->sock;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
session->name);
error = 0;
@@ -989,7 +989,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
struct l2tp_tunnel *tunnel = session->tunnel;
struct pppol2tp_ioc_stats stats;
- l2tp_dbg(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_dbg(session, L2TP_MSG_CONTROL,
"%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
session->name, cmd, arg);
@@ -1009,7 +1009,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mtu=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mtu=%d\n",
session->name, session->mtu);
err = 0;
break;
@@ -1025,7 +1025,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
session->mtu = ifr.ifr_mtu;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mtu=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mtu=%d\n",
session->name, session->mtu);
err = 0;
break;
@@ -1039,7 +1039,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (put_user(session->mru, (int __user *) arg))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mru=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mru=%d\n",
session->name, session->mru);
err = 0;
break;
@@ -1054,7 +1054,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
break;
session->mru = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mru=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mru=%d\n",
session->name, session->mru);
err = 0;
break;
@@ -1064,7 +1064,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (put_user(ps->flags, (int __user *) arg))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get flags=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get flags=%d\n",
session->name, ps->flags);
err = 0;
break;
@@ -1074,7 +1074,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (get_user(val, (int __user *) arg))
break;
ps->flags = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set flags=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set flags=%d\n",
session->name, ps->flags);
err = 0;
break;
@@ -1091,7 +1091,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
if (copy_to_user((void __user *) arg, &stats,
sizeof(stats)))
break;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
session->name);
err = 0;
break;
@@ -1119,7 +1119,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
struct sock *sk;
struct pppol2tp_ioc_stats stats;
- l2tp_dbg(tunnel, PPPOL2TP_MSG_CONTROL,
+ l2tp_dbg(tunnel, L2TP_MSG_CONTROL,
"%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
tunnel->name, cmd, arg);
@@ -1155,7 +1155,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
err = -EFAULT;
break;
}
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
tunnel->name);
err = 0;
break;
@@ -1245,7 +1245,7 @@ static int pppol2tp_tunnel_setsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
tunnel->debug = val;
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
tunnel->name, tunnel->debug);
break;
@@ -1272,8 +1272,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
err = -EINVAL;
break;
}
- session->recv_seq = val ? -1 : 0;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ session->recv_seq = !!val;
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set recv_seq=%d\n",
session->name, session->recv_seq);
break;
@@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
err = -EINVAL;
break;
}
- session->send_seq = val ? -1 : 0;
+ session->send_seq = !!val;
{
struct sock *ssk = ps->sock;
struct pppox_sock *po = pppox_sk(ssk);
@@ -1291,7 +1291,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
l2tp_session_set_header_len(session, session->tunnel->version);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set send_seq=%d\n",
session->name, session->send_seq);
break;
@@ -1301,21 +1301,21 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
err = -EINVAL;
break;
}
- session->lns_mode = val ? -1 : 0;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ session->lns_mode = !!val;
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set lns_mode=%d\n",
session->name, session->lns_mode);
break;
case PPPOL2TP_SO_DEBUG:
session->debug = val;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
session->name, session->debug);
break;
case PPPOL2TP_SO_REORDERTO:
session->reorder_timeout = msecs_to_jiffies(val);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: set reorder_timeout=%d\n",
session->name, session->reorder_timeout);
break;
@@ -1396,7 +1396,7 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
*val = tunnel->debug;
- l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get debug=%x\n",
+ l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get debug=%x\n",
tunnel->name, tunnel->debug);
break;
@@ -1419,31 +1419,31 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
*val = session->recv_seq;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get recv_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_SENDSEQ:
*val = session->send_seq;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get send_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_LNSMODE:
*val = session->lns_mode;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get lns_mode=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_DEBUG:
*val = session->debug;
- l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get debug=%d\n",
+ l2tp_info(session, L2TP_MSG_CONTROL, "%s: get debug=%d\n",
session->name, *val);
break;
case PPPOL2TP_SO_REORDERTO:
*val = (int) jiffies_to_msecs(session->reorder_timeout);
- l2tp_info(session, PPPOL2TP_MSG_CONTROL,
+ l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get reorder_timeout=%d\n", session->name, *val);
break;
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index fc60d9d738b5..b50b64ac8815 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -33,7 +33,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index 182470847fcf..d5d2110eb717 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c
@@ -31,7 +31,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 482c94d9d958..eda726e22f64 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -29,7 +29,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 3c1914df641f..75efde3e616c 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -28,7 +28,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 355cc3b6fa4d..1a5535bc3b8d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -29,7 +29,7 @@
#include <linux/inet.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index db916cf51ffe..5e9296382420 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -532,12 +532,12 @@ out:
static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int rc = 0;
+ add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
+ if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
@@ -547,39 +547,39 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
break;
rc = 0;
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
+ if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
break;
if (signal_pending(current) || !timeout)
break;
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return timeout;
}
static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct llc_sock *llc = llc_sk(sk);
int rc;
+ add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
rc = 0;
if (sk_wait_event(sk, &timeout,
(sk->sk_shutdown & RCV_SHUTDOWN) ||
(!llc_data_accept_state(llc->state) &&
!llc->remote_busy_flag &&
- !llc->p_flag)))
+ !llc->p_flag), &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
if (!timeout)
break;
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index f9137a8341f4..282912245938 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -19,6 +19,7 @@ mac80211-y := \
aes_gcm.o \
aes_cmac.o \
aes_gmac.o \
+ fils_aead.o \
cfg.o \
ethtool.o \
rx.o \
@@ -60,4 +61,4 @@ rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
-ccflags-y += -D__CHECK_ENDIAN__ -DDEBUG
+ccflags-y += -DDEBUG
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index bdf0790d89cc..d0bd5fff5f0a 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -23,7 +23,7 @@
#define AAD_LEN 20
-static void gf_mulx(u8 *pad)
+void gf_mulx(u8 *pad)
{
int i, carry;
@@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad)
pad[AES_BLOCK_SIZE - 1] ^= 0x87;
}
-static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
- const u8 *addr[], const size_t *len, u8 *mac,
- size_t mac_len)
+void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+ const u8 *addr[], const size_t *len, u8 *mac,
+ size_t mac_len)
{
u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
const u8 *pos, *end;
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
index 3702041f44fd..c827e1d5de8b 100644
--- a/net/mac80211/aes_cmac.h
+++ b/net/mac80211/aes_cmac.h
@@ -11,6 +11,10 @@
#include <linux/crypto.h>
+void gf_mulx(u8 *pad);
+void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+ const u8 *addr[], const size_t *len, u8 *mac,
+ size_t mac_len);
struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
size_t key_len);
void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f6749dced021..3b5fd4188f2a 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
mutex_lock(&sta->ampdu_mlme.mtx);
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
- tid_agg_rx = rcu_dereference_protected(
- sta->ampdu_mlme.tid_rx[tid],
- lockdep_is_held(&sta->ampdu_mlme.mtx));
-
- if (tid_agg_rx->dialog_token == dialog_token) {
+ if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
ht_dbg_ratelimited(sta->sdata,
"updated AddBA Req from %pM on tid %u\n",
sta->sta.addr, tid);
@@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* update data */
- tid_agg_rx->dialog_token = dialog_token;
tid_agg_rx->ssn = start_seq_num;
tid_agg_rx->head_seq_num = start_seq_num;
tid_agg_rx->buf_size = buf_size;
@@ -418,6 +413,7 @@ end:
if (status == WLAN_STATUS_SUCCESS) {
__set_bit(tid, sta->ampdu_mlme.agg_session_valid);
__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
+ sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
}
mutex_unlock(&sta->ampdu_mlme.mtx);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd6541f3ade3..e91e503bf992 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -357,10 +357,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
mutex_lock(&local->sta_mtx);
if (mac_addr) {
- if (ieee80211_vif_is_mesh(&sdata->vif))
- sta = sta_info_get(sdata, mac_addr);
- else
- sta = sta_info_get_bss(sdata, mac_addr);
+ sta = sta_info_get_bss(sdata, mac_addr);
/*
* The ASSOC test makes sure the driver is ready to
* receive the key. When wpa_supplicant has roamed
@@ -867,6 +864,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
}
sdata->needed_rx_chains = sdata->local->rx_chains;
+ sdata->vif.bss_conf.beacon_int = params->beacon_interval;
+
mutex_lock(&local->mtx);
err = ieee80211_vif_use_channel(sdata, &params->chandef,
IEEE80211_CHANCTX_SHARED);
@@ -897,7 +896,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
vlan->vif.type);
}
- sdata->vif.bss_conf.beacon_int = params->beacon_interval;
sdata->vif.bss_conf.dtim_period = params->dtim_period;
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
@@ -1523,9 +1521,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
goto out_err;
if (params->vlan && params->vlan != sta->sdata->dev) {
- bool prev_4addr = false;
- bool new_4addr = false;
-
vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
if (params->vlan->ieee80211_ptr->use_4addr) {
@@ -1535,26 +1530,21 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
- new_4addr = true;
__ieee80211_check_fast_rx_iface(vlansdata);
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta) {
+ sta->sdata->u.vlan.sta)
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
- prev_4addr = true;
- }
+
+ if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ ieee80211_vif_dec_num_mcast(sta->sdata);
sta->sdata = vlansdata;
ieee80211_check_fast_xmit(sta);
- if (sta->sta_state == IEEE80211_STA_AUTHORIZED &&
- prev_4addr != new_4addr) {
- if (new_4addr)
- atomic_dec(&sta->sdata->bss->num_mcast_sta);
- else
- atomic_inc(&sta->sdata->bss->num_mcast_sta);
- }
+ if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ ieee80211_vif_inc_num_mcast(sta->sdata);
ieee80211_send_layer2_update(sta);
}
@@ -2480,13 +2470,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata,
smps_mode == IEEE80211_SMPS_AUTOMATIC)
return 0;
- /* If no associated stations, there's no need to do anything */
- if (!atomic_read(&sdata->u.ap.num_mcast_sta)) {
- sdata->smps_mode = smps_mode;
- ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps);
- return 0;
- }
-
ht_dbg(sdata,
"SMPS %d requested in AP mode, sending Action frame to %d stations\n",
smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta));
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e75cbf6ecc26..a0d901d8992e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -231,9 +231,6 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata)
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
- if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
-
max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta));
}
rcu_read_unlock();
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index f56e2f487d09..e02ba42ca827 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -210,6 +210,7 @@ static const char *hw_flag_names[] = {
FLAG(TX_AMSDU),
FLAG(TX_FRAG_LIST),
FLAG(REPORTS_LOW_ACK),
+ FLAG(SUPPORTS_TX_FRAG),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index bcec1240f41d..1a05f85cb1f0 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -477,6 +477,7 @@ IEEE80211_IF_FILE_RW(tdls_wider_bw);
IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC);
+IEEE80211_IF_FILE(num_mcast_sta_vlan, u.vlan.num_mcast_sta, ATOMIC);
static ssize_t ieee80211_if_fmt_num_buffered_multicast(
const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -684,6 +685,13 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
}
+static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
+{
+ /* add num_mcast_sta_vlan using name num_mcast_sta */
+ debugfs_create_file("num_mcast_sta", 0400, sdata->vif.debugfs_dir,
+ sdata, &num_mcast_sta_vlan_ops);
+}
+
static void add_ibss_files(struct ieee80211_sub_if_data *sdata)
{
DEBUGFS_ADD_MODE(tsf, 0600);
@@ -787,6 +795,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
case NL80211_IFTYPE_AP:
add_ap_files(sdata);
break;
+ case NL80211_IFTYPE_AP_VLAN:
+ add_vlan_files(sdata);
+ break;
case NL80211_IFTYPE_WDS:
add_wds_files(sdata);
break;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2fcdb47a0e6..f6003b8c2c33 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -199,13 +199,18 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
"TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+ bool tid_rx_valid;
+
tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+ tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx);
+ p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
+ tid_rx_valid);
p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
- tid_rx ? tid_rx->dialog_token : 0);
+ tid_rx_valid ?
+ sta->ampdu_mlme.tid_rx_token[i] : 0);
p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
tid_rx ? tid_rx->ssn : 0);
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
new file mode 100644
index 000000000000..ecfdd97758a3
--- /dev/null
+++ b/net/mac80211/fils_aead.c
@@ -0,0 +1,342 @@
+/*
+ * FILS AEAD for (Re)Association Request/Response frames
+ * Copyright 2016, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/skcipher.h>
+
+#include "ieee80211_i.h"
+#include "aes_cmac.h"
+#include "fils_aead.h"
+
+static int aes_s2v(struct crypto_cipher *tfm,
+ size_t num_elem, const u8 *addr[], size_t len[], u8 *v)
+{
+ u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE];
+ size_t i;
+ const u8 *data[2];
+ size_t data_len[2], data_elems;
+
+ /* D = AES-CMAC(K, <zero>) */
+ memset(tmp, 0, AES_BLOCK_SIZE);
+ data[0] = tmp;
+ data_len[0] = AES_BLOCK_SIZE;
+ aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE);
+
+ for (i = 0; i < num_elem - 1; i++) {
+ /* D = dbl(D) xor AES_CMAC(K, Si) */
+ gf_mulx(d); /* dbl */
+ aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp,
+ AES_BLOCK_SIZE);
+ crypto_xor(d, tmp, AES_BLOCK_SIZE);
+ }
+
+ if (len[i] >= AES_BLOCK_SIZE) {
+ /* len(Sn) >= 128 */
+ size_t j;
+ const u8 *pos;
+
+ /* T = Sn xorend D */
+
+ /* Use a temporary buffer to perform xorend on Sn (addr[i]) to
+ * avoid modifying the const input argument.
+ */
+ data[0] = addr[i];
+ data_len[0] = len[i] - AES_BLOCK_SIZE;
+ pos = addr[i] + data_len[0];
+ for (j = 0; j < AES_BLOCK_SIZE; j++)
+ tmp[j] = pos[j] ^ d[j];
+ data[1] = tmp;
+ data_len[1] = AES_BLOCK_SIZE;
+ data_elems = 2;
+ } else {
+ /* len(Sn) < 128 */
+ /* T = dbl(D) xor pad(Sn) */
+ gf_mulx(d); /* dbl */
+ memset(tmp, 0, AES_BLOCK_SIZE);
+ memcpy(tmp, addr[i], len[i]);
+ tmp[len[i]] = 0x80;
+ crypto_xor(d, tmp, AES_BLOCK_SIZE);
+ data[0] = d;
+ data_len[0] = sizeof(d);
+ data_elems = 1;
+ }
+ /* V = AES-CMAC(K, T) */
+ aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+/* Note: addr[] and len[] needs to have one extra slot at the end. */
+static int aes_siv_encrypt(const u8 *key, size_t key_len,
+ const u8 *plain, size_t plain_len,
+ size_t num_elem, const u8 *addr[],
+ size_t len[], u8 *out)
+{
+ u8 v[AES_BLOCK_SIZE];
+ struct crypto_cipher *tfm;
+ struct crypto_skcipher *tfm2;
+ struct skcipher_request *req;
+ int res;
+ struct scatterlist src[1], dst[1];
+ u8 *tmp;
+
+ key_len /= 2; /* S2V key || CTR key */
+
+ addr[num_elem] = plain;
+ len[num_elem] = plain_len;
+ num_elem++;
+
+ /* S2V */
+
+ tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+ /* K1 for S2V */
+ res = crypto_cipher_setkey(tfm, key, key_len);
+ if (!res)
+ res = aes_s2v(tfm, num_elem, addr, len, v);
+ crypto_free_cipher(tfm);
+ if (res)
+ return res;
+
+ /* Use a temporary buffer of the plaintext to handle need for
+ * overwriting this during AES-CTR.
+ */
+ tmp = kmemdup(plain, plain_len, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ /* IV for CTR before encrypted data */
+ memcpy(out, v, AES_BLOCK_SIZE);
+
+ /* Synthetic IV to be used as the initial counter in CTR:
+ * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
+ */
+ v[8] &= 0x7f;
+ v[12] &= 0x7f;
+
+ /* CTR */
+
+ tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+ if (IS_ERR(tfm2)) {
+ kfree(tmp);
+ return PTR_ERR(tfm2);
+ }
+ /* K2 for CTR */
+ res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
+ if (res)
+ goto fail;
+
+ req = skcipher_request_alloc(tfm2, GFP_KERNEL);
+ if (!req) {
+ res = -ENOMEM;
+ goto fail;
+ }
+
+ sg_init_one(src, tmp, plain_len);
+ sg_init_one(dst, out + AES_BLOCK_SIZE, plain_len);
+ skcipher_request_set_crypt(req, src, dst, plain_len, v);
+ res = crypto_skcipher_encrypt(req);
+ skcipher_request_free(req);
+fail:
+ kfree(tmp);
+ crypto_free_skcipher(tfm2);
+ return res;
+}
+
+/* Note: addr[] and len[] needs to have one extra slot at the end. */
+static int aes_siv_decrypt(const u8 *key, size_t key_len,
+ const u8 *iv_crypt, size_t iv_c_len,
+ size_t num_elem, const u8 *addr[], size_t len[],
+ u8 *out)
+{
+ struct crypto_cipher *tfm;
+ struct crypto_skcipher *tfm2;
+ struct skcipher_request *req;
+ struct scatterlist src[1], dst[1];
+ size_t crypt_len;
+ int res;
+ u8 frame_iv[AES_BLOCK_SIZE], iv[AES_BLOCK_SIZE];
+ u8 check[AES_BLOCK_SIZE];
+
+ crypt_len = iv_c_len - AES_BLOCK_SIZE;
+ key_len /= 2; /* S2V key || CTR key */
+ addr[num_elem] = out;
+ len[num_elem] = crypt_len;
+ num_elem++;
+
+ memcpy(iv, iv_crypt, AES_BLOCK_SIZE);
+ memcpy(frame_iv, iv_crypt, AES_BLOCK_SIZE);
+
+ /* Synthetic IV to be used as the initial counter in CTR:
+ * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
+ */
+ iv[8] &= 0x7f;
+ iv[12] &= 0x7f;
+
+ /* CTR */
+
+ tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+ if (IS_ERR(tfm2))
+ return PTR_ERR(tfm2);
+ /* K2 for CTR */
+ res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
+ if (res) {
+ crypto_free_skcipher(tfm2);
+ return res;
+ }
+
+ req = skcipher_request_alloc(tfm2, GFP_KERNEL);
+ if (!req) {
+ crypto_free_skcipher(tfm2);
+ return -ENOMEM;
+ }
+
+ sg_init_one(src, iv_crypt + AES_BLOCK_SIZE, crypt_len);
+ sg_init_one(dst, out, crypt_len);
+ skcipher_request_set_crypt(req, src, dst, crypt_len, iv);
+ res = crypto_skcipher_decrypt(req);
+ skcipher_request_free(req);
+ crypto_free_skcipher(tfm2);
+ if (res)
+ return res;
+
+ /* S2V */
+
+ tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+ /* K1 for S2V */
+ res = crypto_cipher_setkey(tfm, key, key_len);
+ if (!res)
+ res = aes_s2v(tfm, num_elem, addr, len, check);
+ crypto_free_cipher(tfm);
+ if (res)
+ return res;
+ if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0)
+ return -EINVAL;
+ return 0;
+}
+
+int fils_encrypt_assoc_req(struct sk_buff *skb,
+ struct ieee80211_mgd_assoc_data *assoc_data)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+ u8 *capab, *ies, *encr;
+ const u8 *addr[5 + 1], *session;
+ size_t len[5 + 1];
+ size_t crypt_len;
+
+ if (ieee80211_is_reassoc_req(mgmt->frame_control)) {
+ capab = (u8 *)&mgmt->u.reassoc_req.capab_info;
+ ies = mgmt->u.reassoc_req.variable;
+ } else {
+ capab = (u8 *)&mgmt->u.assoc_req.capab_info;
+ ies = mgmt->u.assoc_req.variable;
+ }
+
+ session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
+ ies, skb->data + skb->len - ies);
+ if (!session || session[1] != 1 + 8)
+ return -EINVAL;
+ /* encrypt after FILS Session element */
+ encr = (u8 *)session + 2 + 1 + 8;
+
+ /* AES-SIV AAD vectors */
+
+ /* The STA's MAC address */
+ addr[0] = mgmt->sa;
+ len[0] = ETH_ALEN;
+ /* The AP's BSSID */
+ addr[1] = mgmt->da;
+ len[1] = ETH_ALEN;
+ /* The STA's nonce */
+ addr[2] = assoc_data->fils_nonces;
+ len[2] = FILS_NONCE_LEN;
+ /* The AP's nonce */
+ addr[3] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
+ len[3] = FILS_NONCE_LEN;
+ /* The (Re)Association Request frame from the Capability Information
+ * field to the FILS Session element (both inclusive).
+ */
+ addr[4] = capab;
+ len[4] = encr - capab;
+
+ crypt_len = skb->data + skb->len - encr;
+ skb_put(skb, AES_BLOCK_SIZE);
+ return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
+ encr, crypt_len, 1, addr, len, encr);
+}
+
+int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ u8 *frame, size_t *frame_len,
+ struct ieee80211_mgd_assoc_data *assoc_data)
+{
+ struct ieee80211_mgmt *mgmt = (void *)frame;
+ u8 *capab, *ies, *encr;
+ const u8 *addr[5 + 1], *session;
+ size_t len[5 + 1];
+ int res;
+ size_t crypt_len;
+
+ if (*frame_len < 24 + 6)
+ return -EINVAL;
+
+ capab = (u8 *)&mgmt->u.assoc_resp.capab_info;
+ ies = mgmt->u.assoc_resp.variable;
+ session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
+ ies, frame + *frame_len - ies);
+ if (!session || session[1] != 1 + 8) {
+ mlme_dbg(sdata,
+ "No (valid) FILS Session element in (Re)Association Response frame from %pM",
+ mgmt->sa);
+ return -EINVAL;
+ }
+ /* decrypt after FILS Session element */
+ encr = (u8 *)session + 2 + 1 + 8;
+
+ /* AES-SIV AAD vectors */
+
+ /* The AP's BSSID */
+ addr[0] = mgmt->sa;
+ len[0] = ETH_ALEN;
+ /* The STA's MAC address */
+ addr[1] = mgmt->da;
+ len[1] = ETH_ALEN;
+ /* The AP's nonce */
+ addr[2] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
+ len[2] = FILS_NONCE_LEN;
+ /* The STA's nonce */
+ addr[3] = assoc_data->fils_nonces;
+ len[3] = FILS_NONCE_LEN;
+ /* The (Re)Association Response frame from the Capability Information
+ * field to the FILS Session element (both inclusive).
+ */
+ addr[4] = capab;
+ len[4] = encr - capab;
+
+ crypt_len = frame + *frame_len - encr;
+ if (crypt_len < AES_BLOCK_SIZE) {
+ mlme_dbg(sdata,
+ "Not enough room for AES-SIV data after FILS Session element in (Re)Association Response frame from %pM",
+ mgmt->sa);
+ return -EINVAL;
+ }
+ res = aes_siv_decrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
+ encr, crypt_len, 5, addr, len, encr);
+ if (res != 0) {
+ mlme_dbg(sdata,
+ "AES-SIV decryption of (Re)Association Response frame from %pM failed",
+ mgmt->sa);
+ return res;
+ }
+ *frame_len -= AES_BLOCK_SIZE;
+ return 0;
+}
diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h
new file mode 100644
index 000000000000..fbc65232f0b3
--- /dev/null
+++ b/net/mac80211/fils_aead.h
@@ -0,0 +1,19 @@
+/*
+ * FILS AEAD for (Re)Association Request/Response frames
+ * Copyright 2016, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FILS_AEAD_H
+#define FILS_AEAD_H
+
+int fils_encrypt_assoc_req(struct sk_buff *skb,
+ struct ieee80211_mgd_assoc_data *assoc_data);
+int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ u8 *frame, size_t *frame_len,
+ struct ieee80211_mgd_assoc_data *assoc_data);
+
+#endif /* FILS_AEAD_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 34c2add2c455..b2069fbd60f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -84,6 +84,8 @@ struct ieee80211_local;
#define IEEE80211_DEFAULT_MAX_SP_LEN \
IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
+extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
+
#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
@@ -157,7 +159,7 @@ enum ieee80211_bss_valid_data_flags {
IEEE80211_BSS_VALID_ERP = BIT(3)
};
-typedef unsigned __bitwise__ ieee80211_tx_result;
+typedef unsigned __bitwise ieee80211_tx_result;
#define TX_CONTINUE ((__force ieee80211_tx_result) 0u)
#define TX_DROP ((__force ieee80211_tx_result) 1u)
#define TX_QUEUED ((__force ieee80211_tx_result) 2u)
@@ -178,7 +180,7 @@ struct ieee80211_tx_data {
};
-typedef unsigned __bitwise__ ieee80211_rx_result;
+typedef unsigned __bitwise ieee80211_rx_result;
#define RX_CONTINUE ((__force ieee80211_rx_result) 0u)
#define RX_DROP_UNUSABLE ((__force ieee80211_rx_result) 1u)
#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
@@ -307,6 +309,7 @@ struct ieee80211_if_vlan {
/* used for all tx if the VLAN is configured to 4-addr mode */
struct sta_info __rcu *sta;
+ atomic_t num_mcast_sta; /* number of stations receiving multicast */
};
struct mesh_stats {
@@ -398,6 +401,10 @@ struct ieee80211_mgd_assoc_data {
struct ieee80211_vht_cap ap_vht_cap;
+ u8 fils_nonces[2 * FILS_NONCE_LEN];
+ u8 fils_kek[FILS_MAX_KEK_LEN];
+ size_t fils_kek_len;
+
size_t ie_len;
u8 ie[];
};
@@ -442,7 +449,7 @@ struct ieee80211_if_managed {
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_mgd_assoc_data *assoc_data;
- u8 bssid[ETH_ALEN];
+ u8 bssid[ETH_ALEN] __aligned(2);
u16 aid;
@@ -1527,6 +1534,23 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
return false;
}
+void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
+void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata);
+
+/* This function returns the number of multicast stations connected to this
+ * interface. It returns -1 if that number is not tracked, that is for netdevs
+ * not in AP or AP_VLAN mode or when using 4addr.
+ */
+static inline int
+ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata)
+{
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ return atomic_read(&sdata->u.ap.num_mcast_sta);
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
+ return atomic_read(&sdata->u.vlan.num_mcast_sta);
+ return -1;
+}
+
u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
struct ieee80211_rx_status *status,
unsigned int mpdu_len,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 638ec0759078..d37ae7dc114b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -6,6 +6,7 @@
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (c) 2016 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -150,15 +151,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
ieee80211_hw_config(local, change);
}
-static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN)
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
bool check_dup)
{
@@ -1166,7 +1158,6 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_subif_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
- .ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_netdev_select_queue,
.ndo_get_stats64 = ieee80211_get_stats64,
@@ -1200,7 +1191,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_monitor_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
- .ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_monitor_select_queue,
.ndo_get_stats64 = ieee80211_get_stats64,
@@ -1306,6 +1296,26 @@ static void ieee80211_iface_work(struct work_struct *work)
} else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_VHT) {
switch (mgmt->u.action.u.vht_group_notif.action_code) {
+ case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+ struct ieee80211_rx_status *status;
+ enum nl80211_band band;
+ u8 opmode;
+
+ status = IEEE80211_SKB_RXCB(skb);
+ band = status->band;
+ opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+
+ if (sta)
+ ieee80211_vht_handle_opmode(sdata, sta,
+ opmode,
+ band);
+
+ mutex_unlock(&local->sta_mtx);
+ break;
+ }
case WLAN_VHT_ACTION_GROUPID_MGMT:
ieee80211_process_mu_groups(sdata, mgmt);
break;
@@ -1884,6 +1894,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
+ /* MTU range: 256 - 2304 */
+ ndev->min_mtu = 256;
+ ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
+
ret = register_netdevice(ndev);
if (ret) {
ieee80211_if_free(ndev);
@@ -2005,3 +2019,19 @@ void ieee80211_iface_exit(void)
{
unregister_netdevice_notifier(&mac80211_netdev_notifier);
}
+
+void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata)
+{
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ atomic_inc(&sdata->u.ap.num_mcast_sta);
+ else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ atomic_inc(&sdata->u.vlan.num_mcast_sta);
+}
+
+void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata)
+{
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ atomic_dec(&sdata->u.ap.num_mcast_sta);
+ else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ atomic_dec(&sdata->u.vlan.num_mcast_sta);
+}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index edd6f2945f69..a98fc2b5e0dc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -265,7 +265,8 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
if (uni) {
rcu_assign_pointer(sdata->default_unicast_key, key);
ieee80211_check_fast_xmit_iface(sdata);
- drv_set_default_unicast_key(sdata->local, sdata, idx);
+ if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
+ drv_set_default_unicast_key(sdata->local, sdata, idx);
}
if (multi)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1075ac24c8c5..56fb47953b72 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -549,6 +549,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_FEATURE_MAC_ON_CREATE |
NL80211_FEATURE_USERSPACE_MPM |
NL80211_FEATURE_FULL_AP_CLIENT_STATE;
+ wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
if (!ops->hw_scan)
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -821,6 +822,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
!local->ops->tdls_recv_channel_switch))
return -EOPNOTSUPP;
+ if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) &&
+ !local->ops->set_frag_threshold))
+ return -EINVAL;
+
if (WARN_ON(local->hw.wiphy->interface_modes &
BIT(NL80211_IFTYPE_NAN) &&
(!local->ops->start_nan || !local->ops->stop_nan)))
@@ -908,12 +913,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = supp_ht || sband->ht_cap.ht_supported;
supp_vht = supp_vht || sband->vht_cap.vht_supported;
- if (sband->ht_cap.ht_supported)
- local->rx_chains =
- max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
- local->rx_chains);
+ if (!sband->ht_cap.ht_supported)
+ continue;
/* TODO: consider VHT for RX chains, hopefully it's the same */
+ local->rx_chains =
+ max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
+ local->rx_chains);
+
+ /* no need to mask, SM_PS_DISABLED has all bits set */
+ sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+ IEEE80211_HT_CAP_SM_PS_SHIFT;
}
/* if low-level driver supports AP, we also support VLAN */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7486f2dab4ba..098ce9b179ee 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -30,6 +30,7 @@
#include "driver-ops.h"
#include "rate.h"
#include "led.h"
+#include "fils_aead.h"
#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
@@ -652,6 +653,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
2 + sizeof(struct ieee80211_ht_cap) + /* HT */
2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
assoc_data->ie_len + /* extra IEs */
+ (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
9, /* WMM */
GFP_KERNEL);
if (!skb)
@@ -875,6 +877,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
memcpy(pos, assoc_data->ie + offset, noffset - offset);
}
+ if (assoc_data->fils_kek_len &&
+ fils_encrypt_assoc_req(skb, assoc_data) < 0) {
+ dev_kfree_skb(skb);
+ return;
+ }
+
drv_mgd_prepare_tx(local, sdata);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -2510,7 +2518,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
}
static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
- bool assoc)
+ bool assoc, bool abandon)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
@@ -2533,6 +2541,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
mutex_unlock(&sdata->local->mtx);
+
+ if (abandon)
+ cfg80211_abandon_assoc(sdata->dev, assoc_data->bss);
}
kfree(assoc_data);
@@ -2618,6 +2629,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
case WLAN_AUTH_LEAP:
case WLAN_AUTH_FT:
case WLAN_AUTH_SAE:
+ case WLAN_AUTH_FILS_SK:
+ case WLAN_AUTH_FILS_SK_PFS:
+ case WLAN_AUTH_FILS_PK:
break;
case WLAN_AUTH_SHARED_KEY:
if (ifmgd->auth_data->expected_transaction != 4) {
@@ -2762,7 +2776,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
bssid, reason_code,
ieee80211_get_reason_code_string(reason_code));
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, true);
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
return;
@@ -3143,6 +3157,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
reassoc ? "Rea" : "A", mgmt->sa,
capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
+ if (assoc_data->fils_kek_len &&
+ fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
+ return;
+
pos = mgmt->u.assoc_resp.variable;
ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
@@ -3167,14 +3185,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
if (status_code != WLAN_STATUS_SUCCESS) {
sdata_info(sdata, "%pM denied association (code=%d)\n",
mgmt->sa, status_code);
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, false);
event.u.mlme.status = MLME_DENIED;
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
} else {
if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) {
/* oops -- internal error -- send timeout for now */
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, bss);
return;
}
@@ -3187,13 +3205,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
* recalc after assoc_data is NULL but before associated
* is set can cause the interface to go idle
*/
- ieee80211_destroy_assoc_data(sdata, true);
+ ieee80211_destroy_assoc_data(sdata, true, false);
/* get uapsd queues configuration */
uapsd_queues = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
if (sdata->tx_conf[ac].uapsd)
- uapsd_queues |= BIT(ac);
+ uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
}
cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
@@ -3886,7 +3904,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
.u.mlme.status = MLME_TIMEOUT,
};
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, bss);
drv_event_callback(sdata->local, sdata, &event);
}
@@ -4025,7 +4043,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DEAUTH_LEAVING,
false, frame_buf);
if (ifmgd->assoc_data)
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, true);
if (ifmgd->auth_data)
ieee80211_destroy_auth_data(sdata, false);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
@@ -4479,24 +4497,36 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
case NL80211_AUTHTYPE_SAE:
auth_alg = WLAN_AUTH_SAE;
break;
+ case NL80211_AUTHTYPE_FILS_SK:
+ auth_alg = WLAN_AUTH_FILS_SK;
+ break;
+ case NL80211_AUTHTYPE_FILS_SK_PFS:
+ auth_alg = WLAN_AUTH_FILS_SK_PFS;
+ break;
+ case NL80211_AUTHTYPE_FILS_PK:
+ auth_alg = WLAN_AUTH_FILS_PK;
+ break;
default:
return -EOPNOTSUPP;
}
- auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len +
+ auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
if (!auth_data)
return -ENOMEM;
auth_data->bss = req->bss;
- if (req->sae_data_len >= 4) {
- __le16 *pos = (__le16 *) req->sae_data;
- auth_data->sae_trans = le16_to_cpu(pos[0]);
- auth_data->sae_status = le16_to_cpu(pos[1]);
- memcpy(auth_data->data, req->sae_data + 4,
- req->sae_data_len - 4);
- auth_data->data_len += req->sae_data_len - 4;
+ if (req->auth_data_len >= 4) {
+ if (req->auth_type == NL80211_AUTHTYPE_SAE) {
+ __le16 *pos = (__le16 *) req->auth_data;
+
+ auth_data->sae_trans = le16_to_cpu(pos[0]);
+ auth_data->sae_status = le16_to_cpu(pos[1]);
+ }
+ memcpy(auth_data->data, req->auth_data + 4,
+ req->auth_data_len - 4);
+ auth_data->data_len += req->auth_data_len - 4;
}
if (req->ie && req->ie_len) {
@@ -4692,6 +4722,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->ie_len = req->ie_len;
}
+ if (req->fils_kek) {
+ /* should already be checked in cfg80211 - so warn */
+ if (WARN_ON(req->fils_kek_len > FILS_MAX_KEK_LEN)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+ memcpy(assoc_data->fils_kek, req->fils_kek,
+ req->fils_kek_len);
+ assoc_data->fils_kek_len = req->fils_kek_len;
+ }
+
+ if (req->fils_nonces)
+ memcpy(assoc_data->fils_nonces, req->fils_nonces,
+ 2 * FILS_NONCE_LEN);
+
assoc_data->bss = req->bss;
if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
@@ -4907,7 +4952,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
frame_buf);
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, true);
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code);
@@ -4982,7 +5027,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
sdata_lock(sdata);
if (ifmgd->assoc_data) {
struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
- ieee80211_destroy_assoc_data(sdata, false);
+ ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, bss);
}
if (ifmgd->auth_data)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a47bbc973f2d..3090dd4342f6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1394,13 +1394,15 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid)
u8 ac = ieee802_1d_to_ac[tid & 7];
/*
- * If this AC is not trigger-enabled do nothing.
+ * If this AC is not trigger-enabled do nothing unless the
+ * driver is calling us after it already checked.
*
* NB: This could/should check a separate bitmap of trigger-
* enabled queues, but for now we only implement uAPSD w/o
* TSPEC changes to the ACs, so they're always the same.
*/
- if (!(sta->sta.uapsd_queues & BIT(ac)))
+ if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) &&
+ tid != IEEE80211_NUM_TIDS)
return;
/* if we are in a service period, do nothing */
@@ -2215,7 +2217,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
- if (is_multicast_ether_addr(ehdr->h_dest)) {
+ if (is_multicast_ether_addr(ehdr->h_dest) &&
+ ieee80211_vif_get_num_mcast_if(sdata) != 0) {
/*
* send multicast frames both to higher layers in
* local net stack and back to the wireless medium
@@ -2224,7 +2227,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if (!xmit_skb)
net_info_ratelimited("%s: failed to clone multicast frame\n",
dev->name);
- } else {
+ } else if (!is_multicast_ether_addr(ehdr->h_dest)) {
dsta = sta_info_get(sdata, skb->data);
if (dsta) {
/*
@@ -2469,7 +2472,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
if (!ifmsh->mshcfg.dot11MeshForwarding)
goto out;
- fwd_skb = skb_copy(skb, GFP_ATOMIC);
+ fwd_skb = skb_copy_expand(skb, local->tx_headroom +
+ sdata->encrypt_headroom, 0, GFP_ATOMIC);
if (!fwd_skb) {
net_info_ratelimited("%s: failed to clone mesh frame\n",
sdata->name);
@@ -2877,17 +2881,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
case WLAN_VHT_ACTION_OPMODE_NOTIF: {
- u8 opmode;
-
/* verify opmode is present */
if (len < IEEE80211_MIN_ACTION_SIZE + 2)
goto invalid;
-
- opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
- ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
- opmode, status->band);
- goto handled;
+ goto queue;
}
case WLAN_VHT_ACTION_GROUPID_MGMT: {
if (len < IEEE80211_MIN_ACTION_SIZE + 25)
@@ -3939,21 +3936,31 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u64_stats_update_end(&stats->syncp);
if (fast_rx->internal_forward) {
- struct sta_info *dsta = sta_info_get(rx->sdata, skb->data);
+ struct sk_buff *xmit_skb = NULL;
+ bool multicast = is_multicast_ether_addr(skb->data);
- if (dsta) {
+ if (multicast) {
+ xmit_skb = skb_copy(skb, GFP_ATOMIC);
+ } else if (sta_info_get(rx->sdata, skb->data)) {
+ xmit_skb = skb;
+ skb = NULL;
+ }
+
+ if (xmit_skb) {
/*
* Send to wireless media and increase priority by 256
* to keep the received priority instead of
* reclassifying the frame (see cfg80211_classify8021d).
*/
- skb->priority += 256;
- skb->protocol = htons(ETH_P_802_3);
- skb_reset_network_header(skb);
- skb_reset_mac_header(skb);
- dev_queue_xmit(skb);
- return true;
+ xmit_skb->priority += 256;
+ xmit_skb->protocol = htons(ETH_P_802_3);
+ skb_reset_network_header(xmit_skb);
+ skb_reset_mac_header(xmit_skb);
+ dev_queue_xmit(xmit_skb);
}
+
+ if (!skb)
+ return true;
}
/* deliver to local stack */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8e05032689f0..50c309094c37 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
unsigned long tids;
- if (ignore_for_tim & BIT(ac))
+ if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac])
continue;
indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs,
return true;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- if (ignored_acs & BIT(ac))
+ if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
continue;
if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs,
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
unsigned long tids;
- if (ignored_acs & BIT(ac))
+ if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
continue;
tids = ieee80211_tids_for_ac(ac);
@@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
BIT(find_highest_prio_tid(driver_release_tids));
if (skb_queue_empty(&frames) && !driver_release_tids) {
- int tid;
+ int tid, ac;
/*
* For PS-Poll, this can only happen due to a race condition
@@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
*/
/* This will evaluate to 1, 3, 5 or 7. */
- tid = 7 - ((ffs(~ignored_acs) - 1) << 1);
+ for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++)
+ if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac]))
+ break;
+ tid = 7 - 2 * ac;
ieee80211_send_null_response(sta, tid, reason, true, false);
} else if (!driver_release_tids) {
@@ -1871,10 +1874,7 @@ int sta_info_move_state(struct sta_info *sta,
if (!sta->sta.support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
} else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
- if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
- (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- !sta->sdata->u.vlan.sta))
- atomic_dec(&sta->sdata->bss->num_mcast_sta);
+ ieee80211_vif_dec_num_mcast(sta->sdata);
clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
ieee80211_clear_fast_xmit(sta);
ieee80211_clear_fast_rx(sta);
@@ -1882,10 +1882,7 @@ int sta_info_move_state(struct sta_info *sta,
break;
case IEEE80211_STA_AUTHORIZED:
if (sta->sta_state == IEEE80211_STA_ASSOC) {
- if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
- (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- !sta->sdata->u.vlan.sta))
- atomic_inc(&sta->sdata->bss->num_mcast_sta);
+ ieee80211_vif_inc_num_mcast(sta->sdata);
set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
ieee80211_check_fast_xmit(sta);
ieee80211_check_fast_rx(sta);
@@ -1975,6 +1972,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
u16 brate;
unsigned int shift;
+ rinfo->flags = 0;
sband = local->hw.wiphy->bands[(rate >> 4) & 0xf];
brate = sband->bitrates[rate & 0xf].bitrate;
if (rinfo->bw == RATE_INFO_BW_5)
@@ -1990,14 +1988,15 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
}
-static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
+static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
- rinfo->flags = 0;
- else
- sta_stats_decode_rate(sta->local, rate, rinfo);
+ return -EINVAL;
+
+ sta_stats_decode_rate(sta->local, rate, rinfo);
+ return 0;
}
static void sta_set_tidstats(struct sta_info *sta,
@@ -2202,8 +2201,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
}
if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
- sta_set_rate_info_rx(sta, &sinfo->rxrate);
- sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+ if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
+ sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
}
sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ed5fcb984a01..dd06ef0b8861 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -184,7 +184,6 @@ struct tid_ampdu_tx {
* @ssn: Starting Sequence Number expected to be aggregated.
* @buf_size: buffer size for incoming A-MPDUs
* @timeout: reset timer value (in TUs).
- * @dialog_token: dialog token for aggregation session
* @rcu_head: RCU head used for freeing this struct
* @reorder_lock: serializes access to reorder buffer, see below.
* @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -213,7 +212,6 @@ struct tid_ampdu_rx {
u16 ssn;
u16 buf_size;
u16 timeout;
- u8 dialog_token;
bool auto_seq;
bool removed;
};
@@ -225,6 +223,7 @@ struct tid_ampdu_rx {
* to tid_tx[idx], which are protected by the sta spinlock)
* tid_start_tx is also protected by sta->lock.
* @tid_rx: aggregation info for Rx per TID -- RCU protected
+ * @tid_rx_token: dialog tokens for valid aggregation sessions
* @tid_rx_timer_expired: bitmap indicating on which TIDs the
* RX timer expired until the work for it runs
* @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the
@@ -243,6 +242,7 @@ struct sta_ampdu_mlme {
struct mutex mtx;
/* rx */
struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
+ u8 tid_rx_token[IEEE80211_NUM_TIDS];
unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bd5f4be89435..797e847cbc49 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -331,9 +331,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc);
return TX_DROP;
}
- } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP &&
- ieee80211_is_data(hdr->frame_control) &&
- !atomic_read(&tx->sdata->u.ap.num_mcast_sta))) {
+ } else if (unlikely(ieee80211_is_data(hdr->frame_control) &&
+ ieee80211_vif_get_num_mcast_if(tx->sdata) == 0)) {
/*
* No associated STAs - no need to send multicast
* frames.
@@ -935,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
if (info->flags & IEEE80211_TX_CTL_DONTFRAG)
return TX_CONTINUE;
- if (tx->local->ops->set_frag_threshold)
+ if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG))
return TX_CONTINUE;
/*
@@ -1244,7 +1243,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
+ struct sta_info *sta,
struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -1258,10 +1257,13 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
if (!ieee80211_is_data(hdr->frame_control))
return NULL;
- if (pubsta) {
+ if (sta) {
u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
- txq = pubsta->txq[tid];
+ if (!sta->uploaded)
+ return NULL;
+
+ txq = sta->sta.txq[tid];
} else if (vif) {
txq = vif->txq;
}
@@ -1504,23 +1506,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
struct fq *fq = &local->fq;
struct ieee80211_vif *vif;
struct txq_info *txqi;
- struct ieee80211_sta *pubsta;
if (!local->ops->wake_tx_queue ||
sdata->vif.type == NL80211_IFTYPE_MONITOR)
return false;
- if (sta && sta->uploaded)
- pubsta = &sta->sta;
- else
- pubsta = NULL;
-
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
struct ieee80211_sub_if_data, u.ap);
vif = &sdata->vif;
- txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+ txqi = ieee80211_get_txq(local, vif, sta, skb);
if (!txqi)
return false;
@@ -2798,7 +2794,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* fast-xmit doesn't handle fragmentation at all */
if (local->hw.wiphy->frag_threshold != (u32)-1 &&
- !local->ops->set_frag_threshold)
+ !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG))
goto out;
rcu_read_lock();
@@ -3057,11 +3053,12 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr;
- struct ethhdr amsdu_hdr;
+ struct ethhdr *amsdu_hdr;
int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
int subframe_len = skb->len - hdr_len;
void *data;
- u8 *qc;
+ u8 *qc, *h_80211_src, *h_80211_dst;
+ const u8 *bssid;
if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
return false;
@@ -3069,19 +3066,44 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
return true;
- if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr),
+ if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr),
&subframe_len))
return false;
- amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
- memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
- memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
+ data = skb_push(skb, sizeof(*amsdu_hdr));
+ memmove(data, data + sizeof(*amsdu_hdr), hdr_len);
+ hdr = data;
+ amsdu_hdr = data + hdr_len;
+ /* h_80211_src/dst is addr* field within hdr */
+ h_80211_src = data + fast_tx->sa_offs;
+ h_80211_dst = data + fast_tx->da_offs;
+
+ amsdu_hdr->h_proto = cpu_to_be16(subframe_len);
+ ether_addr_copy(amsdu_hdr->h_source, h_80211_src);
+ ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst);
+
+ /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA
+ * fields needs to be changed to BSSID for A-MSDU frames depending
+ * on FromDS/ToDS values.
+ */
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ bssid = sdata->u.mgd.bssid;
+ break;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ bssid = sdata->vif.addr;
+ break;
+ default:
+ bssid = NULL;
+ }
- data = skb_push(skb, sizeof(amsdu_hdr));
- memmove(data, data + sizeof(amsdu_hdr), hdr_len);
- memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
+ if (bssid && ieee80211_has_fromds(hdr->frame_control))
+ ether_addr_copy(h_80211_src, bssid);
+
+ if (bssid && ieee80211_has_tods(hdr->frame_control))
+ ether_addr_copy(h_80211_dst, bssid);
- hdr = data;
qc = ieee80211_get_qos_ctl(hdr);
*qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
@@ -3262,7 +3284,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
int hw_headroom = sdata->local->hw.extra_tx_headroom;
struct ethhdr eth;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_info *info;
struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
@@ -3326,6 +3348,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
+ info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
info->band = fast_tx->band;
info->control.vif = &sdata->vif;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 545c79a42a77..ac59fbd280df 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
enum nl80211_iftype iftype = sdata->wdev.iftype;
- int num[NUM_NL80211_IFTYPES];
struct ieee80211_chanctx *ctx;
- int num_different_channels = 0;
int total = 1;
+ struct iface_combination_params params = {
+ .radar_detect = radar_detect,
+ };
lockdep_assert_held(&local->chanctx_mtx);
@@ -3322,12 +3323,19 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
!chandef->chan))
return -EINVAL;
- if (chandef)
- num_different_channels = 1;
-
if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
return -EINVAL;
+ if (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
+ /*
+ * always passing this is harmless, since it'll be the
+ * same value that cfg80211 finds if it finds the same
+ * interface ... and that's always allowed
+ */
+ params.new_beacon_int = sdata->vif.bss_conf.beacon_int;
+ }
+
/* Always allow software iftypes */
if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
if (radar_detect)
@@ -3335,24 +3343,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
return 0;
}
- memset(num, 0, sizeof(num));
+ if (chandef)
+ params.num_different_channels = 1;
if (iftype != NL80211_IFTYPE_UNSPECIFIED)
- num[iftype] = 1;
+ params.iftype_num[iftype] = 1;
list_for_each_entry(ctx, &local->chanctx_list, list) {
if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
continue;
- radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
+ params.radar_detect |=
+ ieee80211_chanctx_radar_detect(local, ctx);
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
- num_different_channels++;
+ params.num_different_channels++;
continue;
}
if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
cfg80211_chandef_compatible(chandef,
&ctx->conf.def))
continue;
- num_different_channels++;
+ params.num_different_channels++;
}
list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
@@ -3365,16 +3375,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
continue;
- num[wdev_iter->iftype]++;
+ params.iftype_num[wdev_iter->iftype]++;
total++;
}
- if (total == 1 && !radar_detect)
+ if (total == 1 && !params.radar_detect)
return 0;
- return cfg80211_check_combinations(local->hw.wiphy,
- num_different_channels,
- radar_detect, num);
+ return cfg80211_check_combinations(local->hw.wiphy, &params);
}
static void
@@ -3390,12 +3398,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
int ieee80211_max_num_channels(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- int num[NUM_NL80211_IFTYPES] = {};
struct ieee80211_chanctx *ctx;
- int num_different_channels = 0;
- u8 radar_detect = 0;
u32 max_num_different_channels = 1;
int err;
+ struct iface_combination_params params = {0};
lockdep_assert_held(&local->chanctx_mtx);
@@ -3403,17 +3409,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
continue;
- num_different_channels++;
+ params.num_different_channels++;
- radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
+ params.radar_detect |=
+ ieee80211_chanctx_radar_detect(local, ctx);
}
list_for_each_entry_rcu(sdata, &local->interfaces, list)
- num[sdata->wdev.iftype]++;
+ params.iftype_num[sdata->wdev.iftype]++;
- err = cfg80211_iter_combinations(local->hw.wiphy,
- num_different_channels, radar_detect,
- num, ieee80211_iter_max_chans,
+ err = cfg80211_iter_combinations(local->hw.wiphy, &params,
+ ieee80211_iter_max_chans,
&max_num_different_channels);
if (err < 0)
return err;
@@ -3456,3 +3462,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
*byte_cnt = txqi->tin.backlog_bytes + frag_bytes;
}
EXPORT_SYMBOL(ieee80211_txq_get_depth);
+
+const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = {
+ IEEE80211_WMM_IE_STA_QOSINFO_AC_VO,
+ IEEE80211_WMM_IE_STA_QOSINFO_AC_VI,
+ IEEE80211_WMM_IE_STA_QOSINFO_AC_BE,
+ IEEE80211_WMM_IE_STA_QOSINFO_AC_BK
+};
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 6832bf6ab69f..43e45bb660bc 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -527,8 +527,10 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
- if (changed > 0)
+ if (changed > 0) {
+ ieee80211_recalc_min_chandef(sdata);
rate_control_rate_update(local, sband, sta, changed);
+ }
}
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 9eb0aee9105b..3e3d3014e9ab 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -236,26 +236,35 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ u8 flags;
u8 *p;
- u8 ack_policy, tid;
if (!ieee80211_is_data_qos(hdr->frame_control))
return;
p = ieee80211_get_qos_ctl(hdr);
- tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
- /* preserve EOSP bit */
- ack_policy = *p & IEEE80211_QOS_CTL_EOSP;
+ /* set up the first byte */
+
+ /*
+ * preserve everything but the TID and ACK policy
+ * (which we both write here)
+ */
+ flags = *p & ~(IEEE80211_QOS_CTL_TID_MASK |
+ IEEE80211_QOS_CTL_ACK_POLICY_MASK);
if (is_multicast_ether_addr(hdr->addr1) ||
sdata->noack_map & BIT(tid)) {
- ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
+ flags |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
info->flags |= IEEE80211_TX_CTL_NO_ACK;
}
- /* qos header is 2 bytes */
- *p++ = ack_policy | tid;
+ *p = flags | tid;
+
+ /* set up the second byte */
+ p++;
+
if (ieee80211_vif_is_mesh(&sdata->vif)) {
/* preserve RSPI and Mesh PS Level bit */
*p &= ((IEEE80211_QOS_CTL_RSPI |
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 42ce9bd4426f..8af6dd388d11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
if (info->control.hw_key &&
(info->flags & IEEE80211_TX_CTL_DONTFRAG ||
- tx->local->ops->set_frag_threshold) &&
+ ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) &&
!(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
/* hwaccel - with no need for SW-generated MMIC */
return TX_CONTINUE;
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 17a51e8389e2..5857bb1e1695 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -3,5 +3,3 @@ mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \
iface.o llsec.o util.o cfg.o trace.o
CFLAGS_trace.o := -I$(src)
-
-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index f9fd0957ab67..7c03fb0ea34c 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -80,11 +80,11 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
if (skb->len > max_sifs_size)
hrtimer_start(&local->ifs_timer,
- ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC),
+ hw->phy->lifs_period * NSEC_PER_USEC,
HRTIMER_MODE_REL);
else
hrtimer_start(&local->ifs_timer,
- ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC),
+ hw->phy->sifs_period * NSEC_PER_USEC,
HRTIMER_MODE_REL);
} else {
ieee802154_wake_queue(hw);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 15fe97644ffe..5b77377e5a15 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
}
EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
-static u32 mpls_multipath_hash(struct mpls_route *rt,
- struct sk_buff *skb, bool bos)
+static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
{
struct mpls_entry_decoded dec;
+ unsigned int mpls_hdr_len = 0;
struct mpls_shim_hdr *hdr;
bool eli_seen = false;
int label_index;
u32 hash = 0;
- for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
+ for (label_index = 0; label_index < MAX_MP_SELECT_LABELS;
label_index++) {
- if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
+ mpls_hdr_len += sizeof(*hdr);
+ if (!pskb_may_pull(skb, mpls_hdr_len))
break;
/* Read and decode the current label */
@@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mpls_route *rt,
eli_seen = true;
}
- bos = dec.bos;
- if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
- sizeof(struct iphdr))) {
+ if (!dec.bos)
+ continue;
+
+ /* found bottom label; does skb have room for a header? */
+ if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) {
const struct iphdr *v4hdr;
- v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
- label_index);
+ v4hdr = (const struct iphdr *)(hdr + 1);
if (v4hdr->version == 4) {
hash = jhash_3words(ntohl(v4hdr->saddr),
ntohl(v4hdr->daddr),
v4hdr->protocol, hash);
} else if (v4hdr->version == 6 &&
- pskb_may_pull(skb, sizeof(*hdr) * label_index +
- sizeof(struct ipv6hdr))) {
+ pskb_may_pull(skb, mpls_hdr_len +
+ sizeof(struct ipv6hdr))) {
const struct ipv6hdr *v6hdr;
- v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
- label_index);
-
+ v6hdr = (const struct ipv6hdr *)(hdr + 1);
hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
hash = jhash_1word(v6hdr->nexthdr, hash);
}
}
+
+ break;
}
return hash;
}
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
- struct sk_buff *skb, bool bos)
+ struct sk_buff *skb)
{
int alive = ACCESS_ONCE(rt->rt_nhn_alive);
u32 hash = 0;
@@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
if (alive <= 0)
return NULL;
- hash = mpls_multipath_hash(rt, skb, bos);
+ hash = mpls_multipath_hash(rt, skb);
nh_index = hash % alive;
if (alive == rt->rt_nhn)
goto out;
@@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
hdr = mpls_hdr(skb);
dec = mpls_entry_decode(hdr);
- /* Pop the label */
- skb_pull(skb, sizeof(*hdr));
- skb_reset_network_header(skb);
-
- skb_orphan(skb);
-
rt = mpls_route_input_rcu(net, dec.label);
if (!rt)
goto drop;
- nh = mpls_select_multipath(rt, skb, dec.bos);
+ nh = mpls_select_multipath(rt, skb);
if (!nh)
goto drop;
@@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (!mpls_output_possible(out_dev))
goto drop;
+ /* Pop the label */
+ skb_pull(skb, sizeof(*hdr));
+ skb_reset_network_header(skb);
+
+ skb_orphan(skb);
+
if (skb_warn_if_lro(skb))
goto drop;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index cf52cf30ac4b..1d281c1ff7c1 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
struct lwtunnel_state *newts;
- int tun_encap_info_len;
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
@@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
if (!tb[MPLS_IPTUNNEL_DST])
return -EINVAL;
- tun_encap_info_len = sizeof(*tun_encap_info);
- newts = lwtunnel_state_alloc(tun_encap_info_len);
+ newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
if (!newts)
return -ENOMEM;
- newts->len = tun_encap_info_len;
tun_encap_info = mpls_lwtunnel_encap(newts);
ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
&tun_encap_info->labels, tun_encap_info->label);
@@ -218,6 +215,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = {
.fill_encap = mpls_fill_encap_info,
.get_encap_size = mpls_encap_nlsize,
.cmp_encap = mpls_encap_cmp,
+ .owner = THIS_MODULE,
};
static int __init mpls_iptunnel_init(void)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e8d56d9a4df2..bbc45f8a7b2d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -57,6 +57,10 @@ config NF_CONNTRACK
config NF_LOG_COMMON
tristate
+config NF_LOG_NETDEV
+ tristate "Netdev packet logging"
+ select NF_LOG_COMMON
+
if NF_CONNTRACK
config NF_CONNTRACK_MARK
@@ -142,38 +146,38 @@ config NF_CONNTRACK_LABELS
to connection tracking entries. It selected by the connlabel match.
config NF_CT_PROTO_DCCP
- tristate 'DCCP protocol connection tracking support'
+ bool 'DCCP protocol connection tracking support'
depends on NETFILTER_ADVANCED
- default IP_DCCP
+ default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on DCCP connections.
- If unsure, say 'N'.
+ If unsure, say Y.
config NF_CT_PROTO_GRE
tristate
config NF_CT_PROTO_SCTP
- tristate 'SCTP protocol connection tracking support'
+ bool 'SCTP protocol connection tracking support'
depends on NETFILTER_ADVANCED
- default IP_SCTP
+ default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on SCTP connections.
- If you want to compile it as a module, say M here and read
- <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+ If unsure, say Y.
config NF_CT_PROTO_UDPLITE
- tristate 'UDP-Lite protocol connection tracking support'
+ bool 'UDP-Lite protocol connection tracking support'
depends on NETFILTER_ADVANCED
+ default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on UDP-Lite
connections.
- To compile it as a module, choose M here. If unsure, say N.
+ If unsure, say Y.
config NF_CONNTRACK_AMANDA
tristate "Amanda backup protocol support"
@@ -380,17 +384,17 @@ config NF_NAT_NEEDED
default y
config NF_NAT_PROTO_DCCP
- tristate
+ bool
depends on NF_NAT && NF_CT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
config NF_NAT_PROTO_UDPLITE
- tristate
+ bool
depends on NF_NAT && NF_CT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
config NF_NAT_PROTO_SCTP
- tristate
+ bool
default NF_NAT && NF_CT_PROTO_SCTP
depends on NF_NAT && NF_CT_PROTO_SCTP
select LIBCRC32C
@@ -474,6 +478,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_RT
+ tristate "Netfilter nf_tables routing module"
+ help
+ This option adds the "rt" expression that you can use to match
+ packet routing information such as the packet nexthop.
+
config NFT_NUMGEN
tristate "Netfilter nf_tables number generator module"
help
@@ -484,7 +494,7 @@ config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
- This option adds the "meta" expression that you can use to match
+ This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
config NFT_SET_RBTREE
@@ -541,6 +551,12 @@ config NFT_NAT
This option adds the "nat" expression that you can use to perform
typical Network Address Translation (NAT) packet transformations.
+config NFT_OBJREF
+ tristate "Netfilter nf_tables stateful object reference module"
+ help
+ This option adds the "objref" expression that allows you to refer to
+ stateful objects, such as counters and quotas.
+
config NFT_QUEUE
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
@@ -581,6 +597,19 @@ config NFT_HASH
This option adds the "hash" expression that you can use to perform
a hash operation on registers.
+config NFT_FIB
+ tristate
+
+config NFT_FIB_INET
+ depends on NF_TABLES_INET
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables fib inet support"
+ help
+ This option allows using the FIB expression from the inet table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
@@ -1409,9 +1438,10 @@ config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
- depends on !NF_CONNTRACK || NF_CONNTRACK
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+ depends on NF_SOCKET_IPV4
+ depends on NF_SOCKET_IPV6
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c23c3c84416f..ca30d1960f1d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,6 +5,9 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -16,11 +19,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
-# SCTP protocol connection tracking
-obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
-obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
# netlink interface for nf_conntrack
obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
@@ -45,17 +44,20 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+# NAT protocols (nf_nat)
+nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
+
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
+# packet logging for netdev family
+obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
+
obj-$(CONFIG_NF_NAT) += nf_nat.o
obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
-
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -81,10 +83,12 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
+obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
@@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_FIB) += nft_fib.o
+obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 004af030ef1a..ce6adfae521a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -102,17 +102,14 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!entry)
return -ENOMEM;
- entry->orig_ops = reg;
- entry->ops = *reg;
- entry->next = NULL;
+ nf_hook_entry_init(entry, reg);
mutex_lock(&nf_hook_mutex);
/* Find the spot in the list */
- while ((p = nf_entry_dereference(*pp)) != NULL) {
- if (reg->priority < p->orig_ops->priority)
+ for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
+ if (reg->priority < nf_hook_entry_priority(p))
break;
- pp = &p->next;
}
rcu_assign_pointer(entry->next, p);
rcu_assign_pointer(*pp, entry);
@@ -139,12 +136,11 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
return;
mutex_lock(&nf_hook_mutex);
- while ((p = nf_entry_dereference(*pp)) != NULL) {
- if (p->orig_ops == reg) {
+ for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
+ if (nf_hook_entry_ops(p) == reg) {
rcu_assign_pointer(*pp, p->next);
break;
}
- pp = &p->next;
}
mutex_unlock(&nf_hook_mutex);
if (!p) {
@@ -302,70 +298,40 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
}
EXPORT_SYMBOL(_nf_unregister_hooks);
-unsigned int nf_iterate(struct sk_buff *skb,
- struct nf_hook_state *state,
- struct nf_hook_entry **entryp)
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+ struct nf_hook_entry *entry)
{
unsigned int verdict;
+ int ret;
- /*
- * The caller must not block between calls to this
- * function because of risk of continuing from deleted element.
- */
- while (*entryp) {
- if (state->thresh > (*entryp)->ops.priority) {
- *entryp = rcu_dereference((*entryp)->next);
- continue;
- }
-
- /* Optimization: we don't need to hold module
- reference here, since function can't sleep. --RR */
-repeat:
- verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
- if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
- if (unlikely((verdict & NF_VERDICT_MASK)
- > NF_MAX_VERDICT)) {
- NFDEBUG("Evil return from %p(%u).\n",
- (*entryp)->ops.hook, state->hook);
- *entryp = rcu_dereference((*entryp)->next);
+ do {
+ verdict = nf_hook_entry_hookfn(entry, skb, state);
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ entry = rcu_dereference(entry->next);
+ break;
+ case NF_DROP:
+ kfree_skb(skb);
+ ret = NF_DROP_GETERR(verdict);
+ if (ret == 0)
+ ret = -EPERM;
+ return ret;
+ case NF_QUEUE:
+ ret = nf_queue(skb, state, &entry, verdict);
+ if (ret == 1 && entry)
continue;
- }
-#endif
- if (verdict != NF_REPEAT)
- return verdict;
- goto repeat;
+ return ret;
+ default:
+ /* Implicit handling for NF_STOLEN, as well as any other
+ * non conventional verdicts.
+ */
+ return 0;
}
- *entryp = rcu_dereference((*entryp)->next);
- }
- return NF_ACCEPT;
-}
-
+ } while (entry);
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
-{
- struct nf_hook_entry *entry;
- unsigned int verdict;
- int ret = 0;
-
- entry = rcu_dereference(state->hook_entries);
-next_hook:
- verdict = nf_iterate(skb, state, &entry);
- if (verdict == NF_ACCEPT || verdict == NF_STOP) {
- ret = 1;
- } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
- kfree_skb(skb);
- ret = NF_DROP_GETERR(verdict);
- if (ret == 0)
- ret = -EPERM;
- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
- goto next_hook;
- }
- return ret;
+ return 1;
}
EXPORT_SYMBOL(nf_hook_slow);
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 234a8ec82076..4083a8051f0f 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_IPMAC
+ tristate "hash:ip,mac set support"
+ depends on IP_SET
+ help
+ This option adds the hash:ip,mac set type support, by which
+ one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_MAC
tristate "hash:mac set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 3dbd5e958489..28ec148df02d 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
# hash types
obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o
obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 2e8e7e5fb4a6..6f09a99298cd 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -22,6 +22,7 @@
#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_memsize IPSET_TOKEN(MTYPE, _memsize)
#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
@@ -40,11 +41,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
{
struct mtype *map = set->data;
- init_timer(&map->gc);
- map->gc.data = (unsigned long)set;
- map->gc.function = gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&map->gc);
+ setup_timer(&map->gc, gc, (unsigned long)set);
+ mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
static void
@@ -82,6 +80,16 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
memset(map->members, 0, map->memsize);
+ set->elements = 0;
+ set->ext_size = 0;
+}
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_memsize(const struct mtype *map, size_t dsize)
+{
+ return sizeof(*map) + map->memsize +
+ map->elements * dsize;
}
static int
@@ -89,14 +97,15 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct mtype *map = set->data;
struct nlattr *nested;
- size_t memsize = sizeof(*map) + map->memsize;
+ size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (mtype_do_head(skb, map) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -140,6 +149,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (ret == IPSET_ADD_FAILED) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(x, set))) {
+ set->elements--;
ret = 0;
} else if (!(flags & IPSET_FLAG_EXIST)) {
set_bit(e->id, map->members);
@@ -148,6 +158,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
/* Element is re-added, cleanup extensions */
ip_set_ext_destroy(set, x);
}
+ if (ret > 0)
+ set->elements--;
if (SET_WITH_TIMEOUT(set))
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
@@ -159,12 +171,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set))
ip_set_init_counter(ext_counter(x, set), ext);
if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(x, set), ext);
+ ip_set_init_comment(set, ext_comment(x, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
/* Activate element */
set_bit(e->id, map->members);
+ set->elements++;
return 0;
}
@@ -181,6 +194,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
return -IPSET_ERR_EXIST;
ip_set_ext_destroy(set, x);
+ set->elements--;
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(x, set)))
return -IPSET_ERR_EXIST;
@@ -276,6 +290,7 @@ mtype_gc(unsigned long ul_set)
if (ip_set_timeout_expired(ext_timeout(x, set))) {
clear_bit(id, map->members);
ip_set_ext_destroy(set, x);
+ set->elements--;
}
}
spin_unlock_bh(&set->lock);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index a748b0c2c981..c296f9b606d4 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -36,7 +36,7 @@ struct ip_set_net {
bool is_destroyed; /* all sets are destroyed */
};
-static int ip_set_net_id __read_mostly;
+static unsigned int ip_set_net_id __read_mostly;
static inline struct ip_set_net *ip_set_pernet(struct net *net)
{
@@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
-typedef void (*destroyer)(void *);
+typedef void (*destroyer)(struct ip_set *, void *);
/* ipset data extension types, in size order */
const struct ip_set_ext_type ip_set_extensions[] = {
@@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
- ext->skbmark = fullmark >> 32;
- ext->skbmarkmask = fullmark & 0xffffffff;
+ ext->skbinfo.skbmark = fullmark >> 32;
+ ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
}
if (tb[IPSET_ATTR_SKBPRIO]) {
if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
- ext->skbprio = be32_to_cpu(nla_get_be32(
- tb[IPSET_ATTR_SKBPRIO]));
+ ext->skbinfo.skbprio =
+ be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
}
if (tb[IPSET_ATTR_SKBQUEUE]) {
if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
- ext->skbqueue = be16_to_cpu(nla_get_be16(
- tb[IPSET_ATTR_SKBQUEUE]));
+ ext->skbinfo.skbqueue =
+ be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
}
return 0;
}
@@ -541,7 +541,7 @@ int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(par->net, index);
+ struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
int ret = 0;
BUG_ON(!set);
@@ -579,7 +579,7 @@ int
ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(par->net, index);
+ struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
int ret;
BUG_ON(!set);
@@ -601,7 +601,7 @@ int
ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(par->net, index);
+ struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
int ret = 0;
BUG_ON(!set);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index d32fd6b036bf..1b05d4a7d5a1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -85,6 +85,8 @@ struct htable {
};
#define hbucket(h, i) ((h)->bucket[i])
+#define ext_size(n, dsize) \
+ (sizeof(struct hbucket) + (n) * (dsize))
#ifndef IPSET_NET_COUNT
#define IPSET_NET_COUNT 1
@@ -150,24 +152,34 @@ htable_bits(u32 hashsize)
#define INIT_CIDR(cidr, host_mask) \
DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
-#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
-
#ifdef IP_SET_HASH_WITH_NET0
-/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
-#define NLEN(family) (SET_HOST_MASK(family) + 1)
+/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
+#define NLEN (HOST_MASK + 1)
#define CIDR_POS(c) ((c) - 1)
#else
-/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
-#define NLEN(family) SET_HOST_MASK(family)
+/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
+#define NLEN HOST_MASK
#define CIDR_POS(c) ((c) - 2)
#endif
#else
-#define NLEN(family) 0
+#define NLEN 0
#endif /* IP_SET_HASH_WITH_NETS */
#endif /* _IP_SET_HASH_GEN_H */
+#ifndef MTYPE
+#error "MTYPE is not defined!"
+#endif
+
+#ifndef HTYPE
+#error "HTYPE is not defined!"
+#endif
+
+#ifndef HOST_MASK
+#error "HOST_MASK is not defined!"
+#endif
+
/* Family dependent templates */
#undef ahash_data
@@ -191,7 +203,6 @@ htable_bits(u32 hashsize)
#undef mtype_same_set
#undef mtype_kadt
#undef mtype_uadt
-#undef mtype
#undef mtype_add
#undef mtype_del
@@ -207,6 +218,7 @@ htable_bits(u32 hashsize)
#undef mtype_variant
#undef mtype_data_match
+#undef htype
#undef HKEY
#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
@@ -233,7 +245,6 @@ htable_bits(u32 hashsize)
#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
-#define mtype MTYPE
#define mtype_add IPSET_TOKEN(MTYPE, _add)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
@@ -249,62 +260,54 @@ htable_bits(u32 hashsize)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
-#ifndef MTYPE
-#error "MTYPE is not defined!"
-#endif
-
-#ifndef HOST_MASK
-#error "HOST_MASK is not defined!"
-#endif
-
#ifndef HKEY_DATALEN
#define HKEY_DATALEN sizeof(struct mtype_elem)
#endif
-#define HKEY(data, initval, htable_bits) \
-(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
- & jhash_mask(htable_bits))
+#define htype MTYPE
-#ifndef htype
-#ifndef HTYPE
-#error "HTYPE is not defined!"
-#endif /* HTYPE */
-#define htype HTYPE
+#define HKEY(data, initval, htable_bits) \
+({ \
+ const u32 *__k = (const u32 *)data; \
+ u32 __l = HKEY_DATALEN / sizeof(u32); \
+ \
+ BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \
+ \
+ jhash2(__k, __l, initval) & jhash_mask(htable_bits); \
+})
/* The generic hash structure */
struct htype {
struct htable __rcu *table; /* the hash table */
+ struct timer_list gc; /* garbage collection when timeout enabled */
u32 maxelem; /* max elements in the hash */
- u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
u32 markmask; /* markmask value for mark mask to store */
#endif
- struct timer_list gc; /* garbage collection when timeout enabled */
- struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI
u8 ahash_max; /* max elements in an array block */
#endif
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
+ struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_NETS
- struct net_prefixes nets[0]; /* book-keeping of prefixes */
+ struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
#endif
};
-#endif /* htype */
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
* sized networks. cidr == real cidr + 1 to support /0.
*/
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
{
int i, j;
/* Add in increasing prefix order, so larger cidr first */
- for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
+ for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
if (j != -1) {
continue;
} else if (h->nets[i].cidr[n] < cidr) {
@@ -323,11 +326,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
{
- u8 i, j, net_end = nets_length - 1;
+ u8 i, j, net_end = NLEN - 1;
- for (i = 0; i < nets_length; i++) {
+ for (i = 0; i < NLEN; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
h->nets[CIDR_POS(cidr)].nets[n]--;
@@ -343,24 +346,9 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
/* Calculate the actual memory size of the set data */
static size_t
-mtype_ahash_memsize(const struct htype *h, const struct htable *t,
- u8 nets_length, size_t dsize)
+mtype_ahash_memsize(const struct htype *h, const struct htable *t)
{
- u32 i;
- struct hbucket *n;
- size_t memsize = sizeof(*h) + sizeof(*t);
-
-#ifdef IP_SET_HASH_WITH_NETS
- memsize += sizeof(struct net_prefixes) * nets_length;
-#endif
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = rcu_dereference_bh(hbucket(t, i));
- if (!n)
- continue;
- memsize += sizeof(struct hbucket) + n->size * dsize;
- }
-
- return memsize;
+ return sizeof(*h) + sizeof(*t);
}
/* Get the ith element from the array block n */
@@ -398,9 +386,10 @@ mtype_flush(struct ip_set *set)
kfree_rcu(n, rcu);
}
#ifdef IP_SET_HASH_WITH_NETS
- memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
+ memset(h->nets, 0, sizeof(h->nets));
#endif
- h->elements = 0;
+ set->elements = 0;
+ set->ext_size = 0;
}
/* Destroy the hashtable part of the set */
@@ -444,11 +433,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
{
struct htype *h = set->data;
- init_timer(&h->gc);
- h->gc.data = (unsigned long)set;
- h->gc.function = gc;
- h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&h->gc);
+ setup_timer(&h->gc, gc, (unsigned long)set);
+ mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
pr_debug("gc initialized, run in every %u\n",
IPSET_GC_PERIOD(set->timeout));
}
@@ -473,12 +459,13 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h)
{
struct htable *t;
struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
@@ -494,21 +481,20 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
continue;
}
data = ahash_data(n, j, dsize);
- if (ip_set_timeout_expired(ext_timeout(data, set))) {
- pr_debug("expired %u/%u\n", i, j);
- clear_bit(j, n->used);
- smp_mb__after_atomic();
+ if (!ip_set_timeout_expired(ext_timeout(data, set)))
+ continue;
+ pr_debug("expired %u/%u\n", i, j);
+ clear_bit(j, n->used);
+ smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
- for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h,
- NCIDR_PUT(DCIDR_GET(data->cidr,
- k)),
- nets_length, k);
+ for (k = 0; k < IPSET_NET_COUNT; k++)
+ mtype_del_cidr(h,
+ NCIDR_PUT(DCIDR_GET(data->cidr, k)),
+ k);
#endif
- ip_set_ext_destroy(set, data);
- h->elements--;
- d++;
- }
+ ip_set_ext_destroy(set, data);
+ set->elements--;
+ d++;
}
if (d >= AHASH_INIT_SIZE) {
if (d >= n->size) {
@@ -532,6 +518,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
d++;
}
tmp->pos = d;
+ set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, i), tmp);
kfree_rcu(n, rcu);
}
@@ -546,7 +533,7 @@ mtype_gc(unsigned long ul_set)
pr_debug("called\n");
spin_lock_bh(&set->lock);
- mtype_expire(set, h, NLEN(set->family), set->dsize);
+ mtype_expire(set, h);
spin_unlock_bh(&set->lock);
h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
@@ -563,7 +550,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t dsize = set->dsize;
+ size_t extsize, dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -606,6 +593,7 @@ retry:
/* There can't be another parallel resizing, but dumping is possible */
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
+ extsize = 0;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -636,6 +624,7 @@ retry:
goto cleanup;
}
m->size = AHASH_INIT_SIZE;
+ extsize = ext_size(AHASH_INIT_SIZE, dsize);
RCU_INIT_POINTER(hbucket(t, key), m);
} else if (m->pos >= m->size) {
struct hbucket *ht;
@@ -655,6 +644,7 @@ retry:
memcpy(ht, m, sizeof(struct hbucket) +
m->size * dsize);
ht->size = m->size + AHASH_INIT_SIZE;
+ extsize += ext_size(AHASH_INIT_SIZE, dsize);
kfree(m);
m = ht;
RCU_INIT_POINTER(hbucket(t, key), ht);
@@ -668,6 +658,7 @@ retry:
}
}
rcu_assign_pointer(h->table, t);
+ set->ext_size = extsize;
spin_unlock_bh(&set->lock);
@@ -715,11 +706,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool deleted = false, forceadd = false, reuse = false;
u32 key, multi = 0;
- if (h->elements >= h->maxelem) {
+ if (set->elements >= h->maxelem) {
if (SET_WITH_TIMEOUT(set))
/* FIXME: when set is full, we slow down here */
- mtype_expire(set, h, NLEN(set->family), set->dsize);
- if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ mtype_expire(set, h);
+ if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
forceadd = true;
}
@@ -727,20 +718,15 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
key = HKEY(value, h->initval, t->htable_bits);
n = __ipset_dereference_protected(hbucket(t, key), 1);
if (!n) {
- if (forceadd) {
- if (net_ratelimit())
- pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- return -IPSET_ERR_HASH_FULL;
- } else if (h->elements >= h->maxelem) {
+ if (forceadd || set->elements >= h->maxelem)
goto set_full;
- }
old = NULL;
n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
GFP_ATOMIC);
if (!n)
return -ENOMEM;
n->size = AHASH_INIT_SIZE;
+ set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
for (i = 0; i < n->pos; i++) {
@@ -778,14 +764,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
for (i = 0; i < IPSET_NET_COUNT; i++)
mtype_del_cidr(h,
NCIDR_PUT(DCIDR_GET(data->cidr, i)),
- NLEN(set->family), i);
+ i);
#endif
ip_set_ext_destroy(set, data);
- h->elements--;
+ set->elements--;
}
goto copy_data;
}
- if (h->elements >= h->maxelem)
+ if (set->elements >= h->maxelem)
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
@@ -804,17 +790,17 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
memcpy(n, old, sizeof(struct hbucket) +
old->size * set->dsize);
n->size = old->size + AHASH_INIT_SIZE;
+ set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
}
copy_elem:
j = n->pos++;
data = ahash_data(n, j, set->dsize);
copy_data:
- h->elements++;
+ set->elements++;
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
- NLEN(set->family), i);
+ mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
#endif
memcpy(data, d, sizeof(struct mtype_elem));
overwrite_extensions:
@@ -824,7 +810,7 @@ overwrite_extensions:
if (SET_WITH_COUNTER(set))
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(data, set), ext);
+ ip_set_init_comment(set, ext_comment(data, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
/* Must come last for the case when timed out entry is reused */
@@ -883,11 +869,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
smp_mb__after_atomic();
if (i + 1 == n->pos)
n->pos--;
- h->elements--;
+ set->elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
- NLEN(set->family), j);
+ j);
#endif
ip_set_ext_destroy(set, data);
@@ -896,6 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
if (n->pos == 0 && k == 0) {
+ set->ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
} else if (k >= AHASH_INIT_SIZE) {
@@ -914,6 +901,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
tmp->pos = k;
+ set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, key), tmp);
kfree_rcu(n, rcu);
}
@@ -957,14 +945,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
int i, j = 0;
#endif
u32 key, multi = 0;
- u8 nets_length = NLEN(set->family);
pr_debug("test by nets\n");
- for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
+ for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
#if IPSET_NET_COUNT == 2
mtype_data_reset_elem(d, &orig);
mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
- for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
+ for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
k++) {
mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
true);
@@ -1021,7 +1008,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
* try all possible network sizes
*/
for (i = 0; i < IPSET_NET_COUNT; i++)
- if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
+ if (DCIDR_GET(d->cidr, i) != HOST_MASK)
break;
if (i == IPSET_NET_COUNT) {
ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -1062,7 +1049,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
rcu_read_lock_bh();
t = rcu_dereference_bh_nfnl(h->table);
- memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
+ memsize = mtype_ahash_memsize(h, t) + set->ext_size;
htable_bits = t->htable_bits;
rcu_read_unlock_bh();
@@ -1083,7 +1070,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
goto nla_put_failure;
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -1238,41 +1226,35 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct htype *h;
struct htable *t;
+ pr_debug("Create set %s with family %s\n",
+ set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
+
#ifndef IP_SET_PROTO_UNDEF
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
#endif
-#ifdef IP_SET_HASH_WITH_MARKMASK
- markmask = 0xffffffff;
-#endif
-#ifdef IP_SET_HASH_WITH_NETMASK
- netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
- pr_debug("Create set %s with family %s\n",
- set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
-#endif
-
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
+
#ifdef IP_SET_HASH_WITH_MARKMASK
/* Separated condition in order to avoid directive in argument list */
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
return -IPSET_ERR_PROTOCOL;
-#endif
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
+ markmask = 0xffffffff;
+ if (tb[IPSET_ATTR_MARKMASK]) {
+ markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
+ if (markmask == 0)
+ return -IPSET_ERR_INVALID_MARKMASK;
}
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
+ netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
if (tb[IPSET_ATTR_NETMASK]) {
netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
@@ -1282,33 +1264,21 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return -IPSET_ERR_INVALID_NETMASK;
}
#endif
-#ifdef IP_SET_HASH_WITH_MARKMASK
- if (tb[IPSET_ATTR_MARKMASK]) {
- markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
- if (markmask == 0)
- return -IPSET_ERR_INVALID_MARKMASK;
+ if (tb[IPSET_ATTR_HASHSIZE]) {
+ hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+ if (hashsize < IPSET_MIMINAL_HASHSIZE)
+ hashsize = IPSET_MIMINAL_HASHSIZE;
}
-#endif
+
+ if (tb[IPSET_ATTR_MAXELEM])
+ maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
hsize = sizeof(*h);
-#ifdef IP_SET_HASH_WITH_NETS
- hsize += sizeof(struct net_prefixes) * NLEN(set->family);
-#endif
h = kzalloc(hsize, GFP_KERNEL);
if (!h)
return -ENOMEM;
- h->maxelem = maxelem;
-#ifdef IP_SET_HASH_WITH_NETMASK
- h->netmask = netmask;
-#endif
-#ifdef IP_SET_HASH_WITH_MARKMASK
- h->markmask = markmask;
-#endif
- get_random_bytes(&h->initval, sizeof(h->initval));
- set->timeout = IPSET_NO_TIMEOUT;
-
hbits = htable_bits(hashsize);
hsize = htable_size(hbits);
if (hsize == 0) {
@@ -1320,8 +1290,17 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
kfree(h);
return -ENOMEM;
}
+ h->maxelem = maxelem;
+#ifdef IP_SET_HASH_WITH_NETMASK
+ h->netmask = netmask;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ h->markmask = markmask;
+#endif
+ get_random_bytes(&h->initval, sizeof(h->initval));
+
t->htable_bits = hbits;
- rcu_assign_pointer(h->table, t);
+ RCU_INIT_POINTER(h->table, t);
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
@@ -1339,6 +1318,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
#endif
+ set->timeout = IPSET_NO_TIMEOUT;
if (tb[IPSET_ATTR_TIMEOUT]) {
set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
#ifndef IP_SET_PROTO_UNDEF
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 9d6bf19f7b78..20bfbd315f61 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -82,7 +82,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ip *h = set->data;
+ const struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip *h = set->data;
+ const struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -199,7 +199,7 @@ nla_put_failure:
}
static inline void
-hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
+hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
{
}
@@ -217,7 +217,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ip *h = set->data;
+ const struct hash_ip6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -234,7 +234,7 @@ static int
hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip *h = set->data;
+ const struct hash_ip6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
new file mode 100644
index 000000000000..1ab5ed2f6839
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -0,0 +1,315 @@
+/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
+IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mac");
+
+/* Type specific function prefix */
+#define HTYPE hash_ipmac
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmac4_elem {
+ /* Zero valued IP addresses cannot be stored */
+ __be32 ip;
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
+ const struct hash_ipmac4_elem *e2,
+ u32 *multi)
+{
+ return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e)
+{
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) ||
+ nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+ goto nla_put_failure;
+ return false;
+
+nla_put_failure:
+ return true;
+}
+
+static inline void
+hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
+ const struct hash_ipmac4_elem *e)
+{
+ next->ip = e->ip;
+}
+
+#define MTYPE hash_ipmac4
+#define PF 4
+#define HOST_MASK 32
+#define HKEY_DATALEN sizeof(struct hash_ipmac4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_TWO_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (ether_addr_equal(e.ether, invalid_ether))
+ return -EINVAL;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !tb[IPSET_ATTR_ETHER] ||
+ nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (ether_addr_equal(e.ether, invalid_ether))
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+/* IPv6 variant */
+
+/* Member elements */
+struct hash_ipmac6_elem {
+ /* Zero valued IP addresses cannot be stored */
+ union nf_inet_addr ip;
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
+ const struct hash_ipmac6_elem *e2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) &&
+ ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e)
+{
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
+ nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+ goto nla_put_failure;
+ return false;
+
+nla_put_failure:
+ return true;
+}
+
+static inline void
+hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
+ const struct hash_ipmac6_elem *e)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE hash_ipmac6
+#define PF 6
+#define HOST_MASK 128
+#define HKEY_DATALEN sizeof(struct hash_ipmac6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmac6_elem e = {
+ { .all = { 0 } },
+ { .foo[0] = 0, .foo[1] = 0 }
+ };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_TWO_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (ether_addr_equal(e.ether, invalid_ether))
+ return -EINVAL;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmac6_elem e = {
+ { .all = { 0 } },
+ { .foo[0] = 0, .foo[1] = 0 }
+ };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !tb[IPSET_ATTR_ETHER] ||
+ nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (ether_addr_equal(e.ether, invalid_ether))
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_ipmac_type __read_mostly = {
+ .name = "hash:ip,mac",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
+ .dimension = IPSET_DIM_TWO,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_ipmac_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_ipmac_init(void)
+{
+ return ip_set_type_register(&hash_ipmac_type);
+}
+
+static void __exit
+hash_ipmac_fini(void)
+{
+ ip_set_type_unregister(&hash_ipmac_type);
+}
+
+module_init(hash_ipmac_init);
+module_exit(hash_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index a0695a2ab585..b64cf14e8352 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -85,7 +85,7 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipmark *h = set->data;
+ const struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark *h = set->data;
+ const struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -193,7 +193,7 @@ nla_put_failure:
}
static inline void
-hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
const struct hash_ipmark6_elem *d)
{
}
@@ -211,7 +211,7 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipmark *h = set->data;
+ const struct hash_ipmark6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -227,7 +227,7 @@ static int
hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark *h = set->data;
+ const struct hash_ipmark6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 9d84b3dff603..f438740e6c6a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -108,7 +108,7 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport *h = set->data;
+ const struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -231,7 +231,7 @@ nla_put_failure:
}
static inline void
-hash_ipport6_data_next(struct hash_ipport4_elem *next,
+hash_ipport6_data_next(struct hash_ipport6_elem *next,
const struct hash_ipport6_elem *d)
{
next->port = d->port;
@@ -266,7 +266,7 @@ static int
hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport *h = set->data;
+ const struct hash_ipport6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 215b7b942038..6215fb898c50 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -111,7 +111,7 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip *h = set->data;
+ const struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -241,7 +241,7 @@ nla_put_failure:
}
static inline void
-hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
+hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
const struct hash_ipportip6_elem *d)
{
next->port = d->port;
@@ -277,7 +277,7 @@ static int
hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip *h = set->data;
+ const struct hash_ipportip6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 9ca719625ea3..5ab1b99a53c2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -138,7 +138,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipportnet *h = set->data;
+ const struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -163,7 +163,7 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet *h = set->data;
+ const struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -370,7 +370,7 @@ nla_put_failure:
}
static inline void
-hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
+hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
const struct hash_ipportnet6_elem *d)
{
next->port = d->port;
@@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipportnet *h = set->data;
+ const struct hash_ipportnet6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -414,7 +414,7 @@ static int
hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet *h = set->data;
+ const struct hash_ipportnet6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 3e4bffdc1cc0..5d9e895452e7 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -117,7 +117,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_net *h = set->data;
+ const struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -139,7 +139,7 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net *h = set->data;
+ const struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -268,7 +268,7 @@ nla_put_failure:
}
static inline void
-hash_net6_data_next(struct hash_net4_elem *next,
+hash_net6_data_next(struct hash_net6_elem *next,
const struct hash_net6_elem *d)
{
}
@@ -286,7 +286,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_net *h = set->data;
+ const struct hash_net6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net6_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index f0f688db6213..44cf11939c91 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -156,7 +156,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- struct hash_netiface *h = set->data;
+ struct hash_netiface4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
e.ip &= ip_set_netmask(e.cidr);
-#define IFACE(dir) (par->dir ? par->dir->name : "")
+#define IFACE(dir) (par->state->dir ? par->state->dir->name : "")
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
@@ -196,7 +196,7 @@ static int
hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- struct hash_netiface *h = set->data;
+ struct hash_netiface4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -348,7 +348,7 @@ nla_put_failure:
}
static inline void
-hash_netiface6_data_next(struct hash_netiface4_elem *next,
+hash_netiface6_data_next(struct hash_netiface6_elem *next,
const struct hash_netiface6_elem *d)
{
}
@@ -367,7 +367,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- struct hash_netiface *h = set->data;
+ struct hash_netiface6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index a93dfebffa81..db614e13b193 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -143,7 +143,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netnet *h = set->data;
+ const struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -165,7 +165,7 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet *h = set->data;
+ const struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -352,7 +352,7 @@ nla_put_failure:
}
static inline void
-hash_netnet6_data_next(struct hash_netnet4_elem *next,
+hash_netnet6_data_next(struct hash_netnet6_elem *next,
const struct hash_netnet6_elem *d)
{
}
@@ -377,7 +377,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netnet *h = set->data;
+ const struct hash_netnet6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 731813e0f08c..54b64b6cd0cd 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -133,7 +133,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netport *h = set->data;
+ const struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -157,7 +157,7 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport *h = set->data;
+ const struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -329,7 +329,7 @@ nla_put_failure:
}
static inline void
-hash_netport6_data_next(struct hash_netport4_elem *next,
+hash_netport6_data_next(struct hash_netport6_elem *next,
const struct hash_netport6_elem *d)
{
next->port = d->port;
@@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netport *h = set->data;
+ const struct hash_netport6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = {
.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -372,7 +372,7 @@ static int
hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport *h = set->data;
+ const struct hash_netport6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 9a14c237830f..aff846960ac4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -154,7 +154,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netportnet *h = set->data;
+ const struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -180,7 +180,7 @@ static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet *h = set->data;
+ const struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -406,7 +406,7 @@ nla_put_failure:
}
static inline void
-hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
+hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
const struct hash_netportnet6_elem *d)
{
next->port = d->port;
@@ -432,7 +432,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_netportnet *h = set->data;
+ const struct hash_netportnet6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -458,7 +458,7 @@ static int
hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet *h = set->data;
+ const struct hash_netportnet6 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a2a89e4e0a14..51077c53d76b 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
static inline void
list_set_del(struct ip_set *set, struct set_elem *e)
{
+ set->elements--;
list_del_rcu(&e->list);
call_rcu(&e->rcu, __list_set_del_rcu);
}
@@ -227,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set))
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(e, set), ext);
+ ip_set_init_comment(set, ext_comment(e, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Update timeout last */
@@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
list_add_rcu(&e->list, &prev->list);
else
list_add_tail_rcu(&e->list, &map->members);
+ set->elements++;
return 0;
}
@@ -419,6 +421,8 @@ list_set_flush(struct ip_set *set)
list_for_each_entry_safe(e, n, &map->members, list)
list_set_del(set, e);
+ set->elements = 0;
+ set->ext_size = 0;
}
static void
@@ -441,12 +445,12 @@ list_set_destroy(struct ip_set *set)
set->data = NULL;
}
-static int
-list_set_head(struct ip_set *set, struct sk_buff *skb)
+/* Calculate the actual memory size of the set data */
+static size_t
+list_set_memsize(const struct list_set *map, size_t dsize)
{
- const struct list_set *map = set->data;
- struct nlattr *nested;
struct set_elem *e;
+ size_t memsize;
u32 n = 0;
rcu_read_lock();
@@ -454,13 +458,25 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
n++;
rcu_read_unlock();
+ memsize = sizeof(*map) + n * dsize;
+
+ return memsize;
+}
+
+static int
+list_set_head(struct ip_set *set, struct sk_buff *skb)
+{
+ const struct list_set *map = set->data;
+ struct nlattr *nested;
+ size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
+
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + n * set->dsize)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -570,11 +586,8 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
{
struct list_set *map = set->data;
- init_timer(&map->gc);
- map->gc.data = (unsigned long)set;
- map->gc.function = gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&map->gc);
+ setup_timer(&map->gc, gc, (unsigned long)set);
+ mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
/* Create list:set type of sets */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2c1b498a7a27..db40050f8785 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -70,7 +70,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
EXPORT_SYMBOL(ip_vs_new_conn_out);
-static int ip_vs_net_id __read_mostly;
+static unsigned int ip_vs_net_id __read_mostly;
/* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a6e44ef2ec9a..55e0169caa4c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -48,7 +48,7 @@
#include <net/sock.h>
#include <net/genetlink.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/ip_vs.h>
@@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
*/
/* IPVS genetlink family */
-static struct genl_family ip_vs_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = IPVS_GENL_NAME,
- .version = IPVS_GENL_VERSION,
- .maxattr = IPVS_CMD_ATTR_MAX,
- .netnsok = true, /* Make ipvsadm to work on netns */
-};
+static struct genl_family ip_vs_genl_family;
/* Policy used for first-level command attributes */
static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
@@ -3267,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
- if (IS_ERR(svc) || svc == NULL)
+ if (IS_ERR_OR_NULL(svc))
goto out_err;
/* Dump the destinations */
@@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = {
},
};
+static struct genl_family ip_vs_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = IPVS_GENL_NAME,
+ .version = IPVS_GENL_VERSION,
+ .maxattr = IPVS_CMD_ATTR_MAX,
+ .netnsok = true, /* Make ipvsadm to work on netns */
+ .module = THIS_MODULE,
+ .ops = ip_vs_genl_ops,
+ .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
+};
+
static int __init ip_vs_genl_register(void)
{
- return genl_register_family_with_ops(&ip_vs_genl_family,
- ip_vs_genl_ops);
+ return genl_register_family(&ip_vs_genl_family);
}
static void ip_vs_genl_unregister(void)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 01d3d894de46..4e1a98fcc8c3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
return true;
}
+static inline bool decrement_ttl(struct netns_ipvs *ipvs,
+ int skb_af,
+ struct sk_buff *skb)
+{
+ struct net *net = ipvs->net;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct dst_entry *dst = skb_dst(skb);
+
+ /* check and decrement ttl */
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+ ICMPV6_EXC_HOPLIMIT, 0);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
+
+ return false;
+ }
+
+ /* don't propagate ttl change to cloned packets */
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ return false;
+
+ ipv6_hdr(skb)->hop_limit--;
+ } else
+#endif
+ {
+ if (ip_hdr(skb)->ttl <= 1) {
+ /* Tell the sender its packet died... */
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ return false;
+ }
+
+ /* don't propagate ttl change to cloned packets */
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return false;
+
+ /* Decrease ttl */
+ ip_decrease_ttl(ip_hdr(skb));
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
@@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
return local;
}
+ if (!decrement_ttl(ipvs, skb_af, skb))
+ goto err_put;
+
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
} else {
@@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
return local;
}
+ if (!decrement_ttl(ipvs, skb_af, skb))
+ goto err_put;
+
/* MTU checking */
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0f87e5d21be7..4e8083c5e01d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -85,11 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 64u
-/* upper bound of scan intervals */
-#define GC_INTERVAL_MAX (2 * HZ)
-/* maximum conntracks to evict per gc run */
-#define GC_MAX_EVICTS 256u
+#define GC_MAX_BUCKETS_DIV 128u
+/* upper bound of full table scan */
+#define GC_MAX_SCAN_JIFFIES (16u * HZ)
+/* desired ratio of entries found to be expired */
+#define GC_EVICT_RATIO 50u
static struct conntrack_gc_work conntrack_gc_work;
@@ -783,7 +783,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* set conntrack timestamp, if enabled. */
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
- if (skb->tstamp.tv64 == 0)
+ if (skb->tstamp == 0)
__net_timestamp(skb);
tstamp->start = ktime_to_ns(skb->tstamp);
@@ -938,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
+ unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
@@ -979,8 +980,7 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched_rcu_qs();
- } while (++buckets < goal &&
- expired_count < GC_MAX_EVICTS);
+ } while (++buckets < goal);
if (gc_work->exiting)
return;
@@ -997,27 +997,25 @@ static void gc_worker(struct work_struct *work)
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
- * Normally, expired_count will be 0, this increases the next_run time
- * to priorize 2) above.
+ * Normally, expire ratio will be close to 0.
*
- * As soon as a timed-out entry is found, move towards 1) and increase
- * the scan frequency.
- * In case we have lots of evictions next scan is done immediately.
+ * As soon as a sizeable fraction of the entries have expired
+ * increase scan frequency.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
- gc_work->next_gc_run = 0;
- next_run = 0;
- } else if (expired_count) {
- gc_work->next_gc_run /= 2U;
- next_run = msecs_to_jiffies(1);
+ if (ratio > GC_EVICT_RATIO) {
+ gc_work->next_gc_run = min_interval;
} else {
- if (gc_work->next_gc_run < GC_INTERVAL_MAX)
- gc_work->next_gc_run += msecs_to_jiffies(1);
+ unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
- next_run = gc_work->next_gc_run;
+ BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
+
+ gc_work->next_gc_run += min_interval;
+ if (gc_work->next_gc_run > max)
+ gc_work->next_gc_run = max;
}
+ next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
@@ -1025,7 +1023,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = GC_INTERVAL_MAX;
+ gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1338,7 +1336,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
if (skb->nfct)
goto out;
}
-
+repeat:
ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
@@ -1370,6 +1368,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
NF_CT_STAT_INC_ATOMIC(net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
+ /* Special case: TCP tracker reports an attempt to reopen a
+ * closed/aborted connection. We have to go back and create a
+ * fresh conntrack.
+ */
+ if (ret == -NF_REPEAT)
+ goto repeat;
ret = -ret;
goto out;
}
@@ -1377,15 +1381,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
- if (tmpl) {
- /* Special case: we have to repeat this hook, assign the
- * template again to this packet. We assume that this packet
- * has no conntrack assigned. This is used by nf_ct_tcp. */
- if (ret == NF_REPEAT)
- skb->nfct = (struct nf_conntrack *)tmpl;
- else
- nf_ct_put(tmpl);
- }
+ if (tmpl)
+ nf_ct_put(tmpl);
return ret;
}
@@ -1918,7 +1915,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 8d2c7d8c666a..2d6ee1803415 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -125,6 +125,54 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ const struct nf_conntrack_l3proto *l3proto;
+ int ret;
+
+ might_sleep();
+
+ ret = nf_ct_l3proto_try_module_get(nfproto);
+ if (ret < 0)
+ return ret;
+
+ /* we already have a reference, can't fail */
+ rcu_read_lock();
+ l3proto = __nf_ct_l3proto_find(nfproto);
+ rcu_read_unlock();
+
+ if (!l3proto->net_ns_get)
+ return 0;
+
+ ret = l3proto->net_ns_get(net);
+ if (ret < 0)
+ nf_ct_l3proto_module_put(nfproto);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, u8 nfproto)
+{
+ const struct nf_conntrack_l3proto *l3proto;
+
+ might_sleep();
+
+ /* same as nf_conntrack_netns_get(), reference assumed */
+ rcu_read_lock();
+ l3proto = __nf_ct_l3proto_find(nfproto);
+ rcu_read_unlock();
+
+ if (WARN_ON(!l3proto))
+ return;
+
+ if (l3proto->net_ns_put)
+ l3proto->net_ns_put(net);
+
+ nf_ct_l3proto_module_put(nfproto);
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
@@ -190,20 +238,19 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
+#ifdef CONFIG_SYSCTL
+extern unsigned int nf_conntrack_default_on;
+
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- int ret;
-
- if (proto->init_net) {
- ret = proto->init_net(net);
- if (ret < 0)
- return ret;
- }
+ if (nf_conntrack_default_on == 0)
+ return 0;
- return 0;
+ return proto->net_ns_get ? proto->net_ns_get(net) : 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
+#endif
void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
@@ -224,6 +271,16 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto)
{
+ /*
+ * nf_conntrack_default_on *might* have registered hooks.
+ * ->net_ns_put must cope with more puts() than get(), i.e.
+ * if nf_conntrack_default_on was 0 at time of
+ * nf_ct_l3proto_pernet_register invocation this net_ns_put()
+ * should be a noop.
+ */
+ if (proto->net_ns_put)
+ proto->net_ns_put(net);
+
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
@@ -281,15 +338,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
/* FIXME: Allow NULL functions and sub in pointers to generic for
them. --RR */
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
if (l4proto->l3proto >= PF_MAX)
return -EBUSY;
- if ((l4proto->to_nlattr && !l4proto->nlattr_size)
- || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
+ if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
+ (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
return -EINVAL;
mutex_lock(&nf_ct_proto_mutex);
@@ -307,7 +364,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
}
for (i = 0; i < MAX_NF_CT_PROTO; i++)
- RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
+ RCU_INIT_POINTER(proto_array[i],
+ &nf_conntrack_l4proto_generic);
/* Before making proto_array visible to lockless readers,
* we must make sure its content is committed to memory.
@@ -335,10 +393,10 @@ out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
-int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_pernet_register_one(struct net *net,
+ struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nf_proto_net *pn = NULL;
@@ -361,9 +419,9 @@ int nf_ct_l4proto_pernet_register(struct net *net,
out:
return ret;
}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= PF_MAX);
@@ -378,10 +436,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
synchronize_rcu();
}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
-void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+ struct nf_conntrack_l4proto *l4proto)
{
struct nf_proto_net *pn = NULL;
@@ -395,6 +453,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
+
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
+ unsigned int num_proto)
+{
+ int ret = -EINVAL, ver;
+ unsigned int i;
+
+ for (i = 0; i < num_proto; i++) {
+ ret = nf_ct_l4proto_register_one(l4proto[i]);
+ if (ret < 0)
+ break;
+ }
+ if (i != num_proto) {
+ ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
+ pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
+ ver, l4proto[i]->name, ver);
+ nf_ct_l4proto_unregister(l4proto, i);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+
+int nf_ct_l4proto_pernet_register(struct net *net,
+ struct nf_conntrack_l4proto *l4proto[],
+ unsigned int num_proto)
+{
+ int ret = -EINVAL;
+ unsigned int i;
+
+ for (i = 0; i < num_proto; i++) {
+ ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
+ if (ret < 0)
+ break;
+ }
+ if (i != num_proto) {
+ pr_err("nf_conntrack_%s%d: pernet registration failed\n",
+ l4proto[i]->name,
+ l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
+ nf_ct_l4proto_pernet_unregister(net, l4proto, i);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
+ unsigned int num_proto)
+{
+ while (num_proto-- != 0)
+ nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+
+void nf_ct_l4proto_pernet_unregister(struct net *net,
+ struct nf_conntrack_l4proto *l4proto[],
+ unsigned int num_proto)
+{
+ while (num_proto-- != 0)
+ nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
+}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
int nf_conntrack_proto_pernet_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a45bee52dccc..b68ce6ac13b3 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -9,7 +9,6 @@
*
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/spinlock.h>
@@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
},
};
-/* this module per-net specifics */
-static int dccp_net_id __read_mostly;
-struct dccp_net {
- struct nf_proto_net pn;
- int dccp_loose;
- unsigned int dccp_timeout[CT_DCCP_MAX + 1];
-};
-
-static inline struct dccp_net *dccp_pernet(struct net *net)
+static inline struct nf_dccp_net *dccp_pernet(struct net *net)
{
- return net_generic(net, dccp_net_id);
+ return &net->ct.nf_ct_proto.dccp;
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
- struct dccp_net *dn;
+ struct nf_dccp_net *dn;
struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
@@ -719,7 +710,7 @@ static int dccp_nlattr_size(void)
static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = dccp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = {
#endif /* CONFIG_SYSCTL */
static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
- struct dccp_net *dn)
+ struct nf_dccp_net *dn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table)
@@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
static int dccp_init_net(struct net *net, u_int16_t proto)
{
- struct dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
if (!pn->users) {
@@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
return dccp_kmemdup_sysctl_table(net, pn, dn);
}
-static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.name = "dccp",
@@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &dccp_net_id,
.init_net = dccp_init_net,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.name = "dccp",
@@ -932,78 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &dccp_net_id,
.init_net = dccp_init_net,
};
-
-static __net_init int dccp_net_init(struct net *net)
-{
- int ret = 0;
- ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
- if (ret < 0) {
- pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
- goto out;
- }
- ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
- if (ret < 0) {
- pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
- goto cleanup_dccp4;
- }
- return 0;
-cleanup_dccp4:
- nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
-out:
- return ret;
-}
-
-static __net_exit void dccp_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
- nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
-}
-
-static struct pernet_operations dccp_net_ops = {
- .init = dccp_net_init,
- .exit = dccp_net_exit,
- .id = &dccp_net_id,
- .size = sizeof(struct dccp_net),
-};
-
-static int __init nf_conntrack_proto_dccp_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&dccp_net_ops);
- if (ret < 0)
- goto out_pernet;
-
- ret = nf_ct_l4proto_register(&dccp_proto4);
- if (ret < 0)
- goto out_dccp4;
-
- ret = nf_ct_l4proto_register(&dccp_proto6);
- if (ret < 0)
- goto out_dccp6;
-
- return 0;
-out_dccp6:
- nf_ct_l4proto_unregister(&dccp_proto4);
-out_dccp4:
- unregister_pernet_subsys(&dccp_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit nf_conntrack_proto_dccp_fini(void)
-{
- nf_ct_l4proto_unregister(&dccp_proto6);
- nf_ct_l4proto_unregister(&dccp_proto4);
- unregister_pernet_subsys(&dccp_net_ops);
-}
-
-module_init(nf_conntrack_proto_dccp_init);
-module_exit(nf_conntrack_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9a715f88b2f1..87bb40a3feb5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -53,7 +53,7 @@ static unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_REPLIED] = 180*HZ,
};
-static int proto_gre_net_id __read_mostly;
+static unsigned int proto_gre_net_id __read_mostly;
struct netns_proto_gre {
struct nf_proto_net nf;
rwlock_t keymap_lock;
@@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
static int proto_gre_net_init(struct net *net)
{
int ret = 0;
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4);
+
+ ret = nf_ct_l4proto_pernet_register_one(net,
+ &nf_conntrack_l4proto_gre4);
if (ret < 0)
pr_err("nf_conntrack_gre4: pernet registration failed.\n");
return ret;
@@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net)
static void proto_gre_net_exit(struct net *net)
{
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4);
+ nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
nf_ct_gre_keymap_flush(net);
}
@@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void)
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+ ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
if (ret < 0)
goto out_gre4;
@@ -436,7 +437,7 @@ out_pernet:
static void __exit nf_ct_proto_gre_fini(void)
{
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+ nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
unregister_pernet_subsys(&proto_gre_net_ops);
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 982ea62606c7..a0efde38da44 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -15,7 +15,6 @@
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
-#include <linux/module.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/sctp.h>
@@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
}
};
-static int sctp_net_id __read_mostly;
-struct sctp_net {
- struct nf_proto_net pn;
- unsigned int timeouts[SCTP_CONNTRACK_MAX];
-};
-
-static inline struct sctp_net *sctp_pernet(struct net *net)
+static inline struct nf_sctp_net *sctp_pernet(struct net *net)
{
- return net_generic(net, sctp_net_id);
+ return &net->ct.nf_ct_proto.sctp;
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = sctp_pernet(net);
int i;
/* set default SCTP timeouts. */
@@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = {
#endif
static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct sctp_net *sn)
+ struct nf_sctp_net *sn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table)
@@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int sctp_init_net(struct net *net, u_int16_t proto)
{
- struct sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
if (!pn->users) {
@@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
return sctp_kmemdup_sysctl_table(pn, sn);
}
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.name = "sctp",
@@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.nla_policy = sctp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &sctp_net_id,
.init_net = sctp_init_net,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.name = "sctp",
@@ -812,81 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#endif
- .net_id = &sctp_net_id,
.init_net = sctp_init_net,
};
-
-static int sctp_net_init(struct net *net)
-{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
- if (ret < 0) {
- pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
- goto out;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
- if (ret < 0) {
- pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
- goto cleanup_sctp4;
- }
- return 0;
-
-cleanup_sctp4:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
-out:
- return ret;
-}
-
-static void sctp_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
-}
-
-static struct pernet_operations sctp_net_ops = {
- .init = sctp_net_init,
- .exit = sctp_net_exit,
- .id = &sctp_net_id,
- .size = sizeof(struct sctp_net),
-};
-
-static int __init nf_conntrack_proto_sctp_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&sctp_net_ops);
- if (ret < 0)
- goto out_pernet;
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
- if (ret < 0)
- goto out_sctp4;
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
- if (ret < 0)
- goto out_sctp6;
-
- return 0;
-out_sctp6:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
-out_sctp4:
- unregister_pernet_subsys(&sctp_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit nf_conntrack_proto_sctp_fini(void)
-{
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
- unregister_pernet_subsys(&sctp_net_ops);
-}
-
-module_init(nf_conntrack_proto_sctp_init);
-module_exit(nf_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
-MODULE_ALIAS("ip_conntrack_proto_sctp");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 029206e8dec4..c35f7bf05d8c 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/timer.h>
-#include <linux/module.h>
#include <linux/udp.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
@@ -24,26 +23,14 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_log.h>
-enum udplite_conntrack {
- UDPLITE_CT_UNREPLIED,
- UDPLITE_CT_REPLIED,
- UDPLITE_CT_MAX
-};
-
static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = {
[UDPLITE_CT_UNREPLIED] = 30*HZ,
[UDPLITE_CT_REPLIED] = 180*HZ,
};
-static int udplite_net_id __read_mostly;
-struct udplite_net {
- struct nf_proto_net pn;
- unsigned int timeouts[UDPLITE_CT_MAX];
-};
-
-static inline struct udplite_net *udplite_pernet(struct net *net)
+static inline struct nf_udplite_net *udplite_pernet(struct net *net)
{
- return net_generic(net, udplite_net_id);
+ return &net->ct.nf_ct_proto.udplite;
}
static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
@@ -178,7 +165,7 @@ static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct udplite_net *un = udplite_pernet(net);
+ struct nf_udplite_net *un = udplite_pernet(net);
/* set default timeouts for UDPlite. */
timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED];
@@ -237,7 +224,7 @@ static struct ctl_table udplite_sysctl_table[] = {
#endif /* CONFIG_SYSCTL */
static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct udplite_net *un)
+ struct nf_udplite_net *un)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table)
@@ -257,7 +244,7 @@ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int udplite_init_net(struct net *net, u_int16_t proto)
{
- struct udplite_net *un = udplite_pernet(net);
+ struct nf_udplite_net *un = udplite_pernet(net);
struct nf_proto_net *pn = &un->pn;
if (!pn->users) {
@@ -270,7 +257,7 @@ static int udplite_init_net(struct net *net, u_int16_t proto)
return udplite_kmemdup_sysctl_table(pn, un);
}
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
@@ -299,11 +286,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
.nla_policy = udplite_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &udplite_net_id,
.init_net = udplite_init_net,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
@@ -332,78 +319,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
.nla_policy = udplite_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &udplite_net_id,
.init_net = udplite_init_net,
};
-
-static int udplite_net_init(struct net *net)
-{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
- if (ret < 0) {
- pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
- goto out;
- }
- ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
- if (ret < 0) {
- pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
- goto cleanup_udplite4;
- }
- return 0;
-
-cleanup_udplite4:
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
-out:
- return ret;
-}
-
-static void udplite_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
- nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
-}
-
-static struct pernet_operations udplite_net_ops = {
- .init = udplite_net_init,
- .exit = udplite_net_exit,
- .id = &udplite_net_id,
- .size = sizeof(struct udplite_net),
-};
-
-static int __init nf_conntrack_proto_udplite_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&udplite_net_ops);
- if (ret < 0)
- goto out_pernet;
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
- if (ret < 0)
- goto out_udplite4;
-
- ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
- if (ret < 0)
- goto out_udplite6;
-
- return 0;
-out_udplite6:
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
-out_udplite4:
- unregister_pernet_subsys(&udplite_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit nf_conntrack_proto_udplite_exit(void)
-{
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
- nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
- unregister_pernet_subsys(&udplite_net_ops);
-}
-
-module_init(nf_conntrack_proto_udplite_init);
-module_exit(nf_conntrack_proto_udplite_exit);
-
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5f446cd9f3fd..d009ae663453 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -452,6 +452,9 @@ static int log_invalid_proto_max __read_mostly = 255;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
+extern unsigned int nf_conntrack_default_on;
+unsigned int nf_conntrack_default_on __read_mostly = 1;
+
static int
nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -517,6 +520,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "nf_conntrack_default_on",
+ .data = &nf_conntrack_default_on,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 7ec69723940f..c9d7f95768ab 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -14,24 +14,41 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+{
+ if (skb_mac_header_was_set(skb))
+ skb_push(skb, skb->mac_len);
+
+ skb->dev = dev;
+ dev_queue_xmit(skb);
+}
+
+void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+ if (!dev) {
+ kfree_skb(pkt->skb);
+ return;
+ }
+
+ nf_do_netdev_egress(pkt->skb, dev);
+}
+EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
+
void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
{
struct net_device *dev;
struct sk_buff *skb;
- dev = dev_get_by_index_rcu(pkt->net, oif);
+ dev = dev_get_by_index_rcu(nft_net(pkt), oif);
if (dev == NULL)
return;
skb = skb_clone(pkt->skb, GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- if (skb_mac_header_was_set(skb))
- skb_push(skb, skb->mac_len);
-
- skb->dev = dev;
- dev_queue_xmit(skb);
+ if (skb)
+ nf_do_netdev_egress(skb, dev);
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 9fdb655f85bc..c46d214d5323 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -11,11 +11,6 @@
#define NFDEBUG(format, args...)
#endif
-
-/* core.c */
-unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp);
-
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
struct nf_hook_entry **entryp, unsigned int verdict);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3dca90dc24ad..ffb9e8ada899 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -13,7 +13,6 @@
/* Internal logging interface, which relies on the real
LOG target modules */
-#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 119fe1cb1ea9..dc61399e30be 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -175,6 +175,34 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+/* bridge and netdev logging families share this code. */
+void nf_log_l2packet(struct net *net, u_int8_t pf,
+ __be16 protocol,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ switch (protocol) {
+ case htons(ETH_P_IP):
+ nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_IPV6):
+ nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP):
+ nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nf_log_l2packet);
+
static int __init nf_log_common_init(void)
{
return 0;
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
new file mode 100644
index 000000000000..350eb147754d
--- /dev/null
+++ b/net/netfilter/nf_log_netdev.c
@@ -0,0 +1,81 @@
+/*
+ * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_log.h>
+
+static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out,
+ loginfo, prefix);
+}
+
+static struct nf_logger nf_netdev_logger __read_mostly = {
+ .name = "nf_log_netdev",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_netdev_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_netdev_net_init(struct net *net)
+{
+ return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
+}
+
+static void __net_exit nf_log_netdev_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_netdev_logger);
+}
+
+static struct pernet_operations nf_log_netdev_net_ops = {
+ .init = nf_log_netdev_net_init,
+ .exit = nf_log_netdev_net_exit,
+};
+
+static int __init nf_log_netdev_init(void)
+{
+ int ret;
+
+ /* Request to load the real packet loggers. */
+ nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
+
+ ret = register_pernet_subsys(&nf_log_netdev_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
+ return 0;
+}
+
+static void __exit nf_log_netdev_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_netdev_net_ops);
+ nf_log_unregister(&nf_netdev_logger);
+}
+
+module_init(nf_log_netdev_init);
+module_exit(nf_log_netdev_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter netdev packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..94b14c5a8b17 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -682,6 +682,18 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
&nf_nat_l4proto_tcp);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
&nf_nat_l4proto_udp);
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
+ &nf_nat_l4proto_dccp);
+#endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
+ &nf_nat_l4proto_sctp);
+#endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
+ &nf_nat_l4proto_udplite);
+#endif
mutex_unlock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 15c47b246d0d..269fcd5dc34c 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -10,8 +10,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/dccp.h>
@@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_dccp_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP NAT protocol helper");
-MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index cbc7ade1487b..31d358691af0 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -7,9 +7,7 @@
*/
#include <linux/types.h>
-#include <linux/init.h>
#include <linux/sctp.h>
-#include <linux/module.h>
#include <net/sctp/checksum.h>
#include <net/netfilter/nf_nat_l4proto.h>
@@ -49,12 +47,15 @@ sctp_manip_pkt(struct sk_buff *skb,
hdr->dest = tuple->dst.u.sctp.port;
}
- hdr->checksum = sctp_compute_cksum(skb, hdroff);
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ hdr->checksum = sctp_compute_cksum(skb, hdroff);
+ skb->ip_summed = CHECKSUM_NONE;
+ }
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.l4proto = IPPROTO_SCTP,
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -63,34 +64,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_sctp_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 58340c97bd83..366bfbfd82a1 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -8,11 +8,9 @@
*/
#include <linux/types.h>
-#include <linux/init.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
-#include <linux/module.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
@@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
+const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.l4proto = IPPROTO_UDPLITE,
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_udplite_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_udplite_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
-}
-
-module_init(nf_nat_proto_udplite_init);
-module_exit(nf_nat_proto_udplite_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8f08d759844a..4a7662486f44 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
}
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- unsigned int queuenum)
+ struct nf_hook_entry *hook_entry, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
+ .hook = hook_entry,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
struct nf_hook_entry *entry = *entryp;
int ret;
- RCU_INIT_POINTER(state->hook_entries, entry);
- ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
@@ -177,22 +177,38 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
return 0;
}
+static unsigned int nf_iterate(struct sk_buff *skb,
+ struct nf_hook_state *state,
+ struct nf_hook_entry **entryp)
+{
+ unsigned int verdict;
+
+ do {
+repeat:
+ verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+ if (verdict != NF_ACCEPT) {
+ if (verdict != NF_REPEAT)
+ return verdict;
+ goto repeat;
+ }
+ *entryp = rcu_dereference((*entryp)->next);
+ } while (*entryp);
+
+ return NF_ACCEPT;
+}
+
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_hook_entry *hook_entry;
+ struct nf_hook_entry *hook_entry = entry->hook;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
- struct nf_hook_ops *elem;
int err;
- hook_entry = rcu_dereference(entry->state.hook_entries);
- elem = &hook_entry->ops;
-
nf_queue_entry_release_refs(entry);
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
- verdict = elem->hook(elem->priv, skb, &entry->state);
+ verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(entry->state.pf);
@@ -200,8 +216,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
verdict = NF_DROP;
}
- entry->state.thresh = INT_MIN;
-
if (verdict == NF_ACCEPT) {
hook_entry = rcu_dereference(hook_entry->next);
if (hook_entry)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c8a4a48bced9..7c6d1fbe38b9 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_conntrack_zones.h>
-int synproxy_net_id;
+unsigned int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
bool
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e5194f6f906c..1b913760f205 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -22,6 +22,7 @@
#include <net/sock.h>
static LIST_HEAD(nf_tables_expressions);
+static LIST_HEAD(nf_tables_objects);
/**
* nft_register_afinfo - register nf_tables address family info
@@ -110,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->seq = nlh->nlmsg_seq;
}
-static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
- u32 size)
+static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
+ int msg_type, u32 size, gfp_t gfp)
{
struct nft_trans *trans;
- trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
if (trans == NULL)
return NULL;
@@ -125,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
return trans;
}
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+ int msg_type, u32 size)
+{
+ return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
+}
+
static void nft_trans_destroy(struct nft_trans *trans)
{
list_del(&trans->list);
@@ -304,6 +311,38 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
return err;
}
+static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_object *obj)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWOBJ)
+ nft_activate_next(ctx->net, obj);
+
+ nft_trans_obj(trans) = obj;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
+{
+ int err;
+
+ err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj);
+ if (err < 0)
+ return err;
+
+ nft_deactivate_next(ctx->net, obj);
+ ctx->table->use--;
+
+ return err;
+}
+
/*
* Tables
*/
@@ -688,6 +727,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
+ INIT_LIST_HEAD(&table->objects);
table->flags = flags;
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
@@ -709,6 +749,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
{
int err;
struct nft_chain *chain, *nc;
+ struct nft_object *obj, *ne;
struct nft_set *set, *ns;
list_for_each_entry(chain, &ctx->table->chains, list) {
@@ -735,6 +776,12 @@ static int nft_flush_table(struct nft_ctx *ctx)
goto out;
}
+ list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
+ err = nft_delobj(ctx, obj);
+ if (err < 0)
+ goto out;
+ }
+
list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
if (!nft_is_active_next(ctx->net, chain))
continue;
@@ -881,7 +928,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
}
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
- [NFTA_CHAIN_TABLE] = { .type = NLA_STRING },
+ [NFTA_CHAIN_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
[NFTA_CHAIN_NAME] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
@@ -1807,7 +1855,8 @@ static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
- [NFTA_RULE_TABLE] = { .type = NLA_STRING },
+ [NFTA_RULE_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
@@ -2068,7 +2117,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
* is called on error from nf_tables_newrule().
*/
expr = nft_expr_first(rule);
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (expr != nft_expr_last(rule) && expr->ops) {
nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
@@ -2396,7 +2445,8 @@ nft_select_set_ops(const struct nlattr * const nla[],
}
static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
- [NFTA_SET_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_NAME] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_FLAGS] = { .type = NLA_U32 },
@@ -2411,6 +2461,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
[NFTA_SET_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
+ [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2462,6 +2513,7 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
}
return ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
const struct nlattr *nla,
@@ -2480,6 +2532,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
}
return ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -2568,6 +2621,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
goto nla_put_failure;
}
+ if (set->flags & NFT_SET_OBJECT &&
+ nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
+ goto nla_put_failure;
if (set->timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
@@ -2797,7 +2853,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
unsigned int size;
bool create;
u64 timeout;
- u32 ktype, dtype, flags, policy, gc_int;
+ u32 ktype, dtype, flags, policy, gc_int, objtype;
struct nft_set_desc desc;
unsigned char *udata;
u16 udlen;
@@ -2827,11 +2883,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
- NFT_SET_MAP | NFT_SET_EVAL))
+ NFT_SET_MAP | NFT_SET_EVAL |
+ NFT_SET_OBJECT))
return -EINVAL;
- /* Only one of both operations is supported */
- if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
- (NFT_SET_MAP | NFT_SET_EVAL))
+ /* Only one of these operations is supported */
+ if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+ (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
}
@@ -2856,6 +2913,19 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ if (nla[NFTA_SET_OBJ_TYPE] != NULL) {
+ if (!(flags & NFT_SET_OBJECT))
+ return -EINVAL;
+
+ objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+ if (objtype == NFT_OBJECT_UNSPEC ||
+ objtype > NFT_OBJECT_MAX)
+ return -EINVAL;
+ } else if (flags & NFT_SET_OBJECT)
+ return -EINVAL;
+ else
+ objtype = NFT_OBJECT_UNSPEC;
+
timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
@@ -2943,6 +3013,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
set->ktype = ktype;
set->klen = desc.klen;
set->dtype = dtype;
+ set->objtype = objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
@@ -3016,9 +3087,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
}
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
enum nft_registers dreg;
@@ -3064,6 +3135,7 @@ bind:
list_add_tail_rcu(&binding->list, &set->bindings);
return 0;
}
+EXPORT_SYMBOL_GPL(nf_tables_bind_set);
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
@@ -3074,6 +3146,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
}
+EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY] = {
@@ -3085,6 +3158,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_EXPR] = {
.align = __alignof__(struct nft_expr),
},
+ [NFT_SET_EXT_OBJREF] = {
+ .len = sizeof(struct nft_object *),
+ .align = __alignof__(struct nft_object *),
+ },
[NFT_SET_EXT_FLAGS] = {
.len = sizeof(u8),
.align = __alignof__(u8),
@@ -3118,8 +3195,10 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
- [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
- [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
+ [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@@ -3173,6 +3252,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
+ nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
+ (*nft_set_ext_obj(ext))->name) < 0)
+ goto nla_put_failure;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
htonl(*nft_set_ext_flags(ext))))
@@ -3224,9 +3308,9 @@ struct nft_set_dump_args {
};
static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
struct nft_set_dump_args *args;
@@ -3238,7 +3322,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
u8 genmask = nft_genmask_cur(net);
- const struct nft_set *set;
+ struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
@@ -3467,7 +3551,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
-
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+ (*nft_set_ext_obj(ext))->use--;
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -3492,11 +3577,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ u8 genmask = nft_genmask_next(ctx->net);
struct nft_data_desc d1, d2;
struct nft_set_ext_tmpl tmpl;
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_object *obj = NULL;
struct nft_userdata *udata;
struct nft_data data;
enum nft_registers dreg;
@@ -3559,6 +3646,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
}
+ if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
+ if (!(set->flags & NFT_SET_OBJECT)) {
+ err = -EINVAL;
+ goto err2;
+ }
+ obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+ set->objtype, genmask);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err2;
+ }
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ }
+
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
err = nft_data_init(ctx, &data, sizeof(data), &d2,
nla[NFTA_SET_ELEM_DATA]);
@@ -3617,6 +3718,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
+ if (obj) {
+ *nft_set_ext_obj(ext) = obj;
+ obj->use++;
+ }
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
@@ -3626,10 +3731,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
- if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
- nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
- memcmp(nft_set_ext_data(ext),
- nft_set_ext_data(ext2), set->dlen) != 0)
+ if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+ memcmp(nft_set_ext_data(ext),
+ nft_set_ext_data(ext2), set->dlen) != 0) ||
+ (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
+ *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
err = -EBUSY;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
@@ -3637,10 +3745,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err5;
}
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
+ err = -ENFILE;
+ goto err6;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err6:
+ set->ops->remove(set, &elem);
err5:
kfree(trans);
err4:
@@ -3687,15 +3803,9 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- if (set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
- return -ENFILE;
-
err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
- if (err < 0) {
- atomic_dec(&set->nelems);
+ if (err < 0)
break;
- }
}
return err;
}
@@ -3779,6 +3889,35 @@ err1:
return err;
}
+static int nft_flush_set(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ struct nft_trans *trans;
+ int err;
+
+ trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+ sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ if (!trans)
+ return -ENOMEM;
+
+ if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+ err = -ENOENT;
+ goto err1;
+ }
+ set->ndeact++;
+
+ nft_trans_elem_set(trans) = set;
+ nft_trans_elem(trans) = *elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+err1:
+ kfree(trans);
+ return err;
+}
+
static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -3789,9 +3928,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
int rem, err = 0;
- if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
- return -EINVAL;
-
err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
@@ -3803,6 +3939,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
+ struct nft_set_dump_args args = {
+ .iter = {
+ .genmask = genmask,
+ .fn = nft_flush_set,
+ },
+ };
+ set->ops->walk(&ctx, set, &args.iter);
+
+ return args.iter.err;
+ }
+
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
@@ -3838,6 +3986,503 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+/*
+ * Stateful objects
+ */
+
+/**
+ * nft_register_obj- register nf_tables stateful object type
+ * @obj: object type
+ *
+ * Registers the object type for use with nf_tables. Returns zero on
+ * success or a negative errno code otherwise.
+ */
+int nft_register_obj(struct nft_object_type *obj_type)
+{
+ if (obj_type->type == NFT_OBJECT_UNSPEC)
+ return -EINVAL;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_rcu(&obj_type->list, &nf_tables_objects);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_obj);
+
+/**
+ * nft_unregister_obj - unregister nf_tables object type
+ * @obj: object type
+ *
+ * Unregisters the object type for use with nf_tables.
+ */
+void nft_unregister_obj(struct nft_object_type *obj_type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del_rcu(&obj_type->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_obj);
+
+struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u32 objtype, u8 genmask)
+{
+ struct nft_object *obj;
+
+ list_for_each_entry(obj, &table->objects, list) {
+ if (!nla_strcmp(nla, obj->name) &&
+ objtype == obj->type->type &&
+ nft_active_genmask(obj, genmask))
+ return obj;
+ }
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+
+static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
+ [NFTA_OBJ_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
+ [NFTA_OBJ_NAME] = { .type = NLA_STRING,
+ .len = NFT_OBJ_MAXNAMELEN - 1 },
+ [NFTA_OBJ_TYPE] = { .type = NLA_U32 },
+ [NFTA_OBJ_DATA] = { .type = NLA_NESTED },
+};
+
+static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[type->maxattr + 1];
+ struct nft_object *obj;
+ int err;
+
+ if (attr) {
+ err = nla_parse_nested(tb, type->maxattr, attr, type->policy);
+ if (err < 0)
+ goto err1;
+ } else {
+ memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
+ }
+
+ err = -ENOMEM;
+ obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+ if (obj == NULL)
+ goto err1;
+
+ err = type->init((const struct nlattr * const *)tb, obj);
+ if (err < 0)
+ goto err2;
+
+ obj->type = type;
+ return obj;
+err2:
+ kfree(obj);
+err1:
+ return ERR_PTR(err);
+}
+
+static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
+ struct nft_object *obj, bool reset)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr);
+ if (!nest)
+ goto nla_put_failure;
+ if (obj->type->dump(skb, obj, reset) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+{
+ const struct nft_object_type *type;
+
+ list_for_each_entry(type, &nf_tables_objects, list) {
+ if (objtype == type->type)
+ return type;
+ }
+ return NULL;
+}
+
+static const struct nft_object_type *nft_obj_type_get(u32 objtype)
+{
+ const struct nft_object_type *type;
+
+ type = __nft_obj_type_get(objtype);
+ if (type != NULL && try_module_get(type->owner))
+ return type;
+
+#ifdef CONFIG_MODULES
+ if (type == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-obj-%u", objtype);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_obj_type_get(objtype))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_newobj(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_object_type *type;
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_object *obj;
+ struct nft_ctx ctx;
+ u32 objtype;
+ int err;
+
+ if (!nla[NFTA_OBJ_TYPE] ||
+ !nla[NFTA_OBJ_NAME] ||
+ !nla[NFTA_OBJ_DATA])
+ return -EINVAL;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ if (err != -ENOENT)
+ return err;
+
+ obj = NULL;
+ }
+
+ if (obj != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ return 0;
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+
+ type = nft_obj_type_get(objtype);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+
+ obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err1;
+ }
+ obj->table = table;
+ nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+
+ err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
+ if (err < 0)
+ goto err2;
+
+ list_add_tail_rcu(&obj->list, &table->objects);
+ table->use++;
+ return 0;
+err2:
+ if (obj->type->destroy)
+ obj->type->destroy(obj);
+ kfree(obj);
+err1:
+ module_put(type->owner);
+ return err;
+}
+
+static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table,
+ struct nft_object *obj, bool reset)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
+ nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+ nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+struct nft_obj_filter {
+ char table[NFT_OBJ_MAXNAMELEN];
+ u32 type;
+};
+
+static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct nft_obj_filter *filter = cb->data;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ struct nft_object *obj;
+ bool reset = false;
+
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ reset = true;
+
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(obj, &table->objects, list) {
+ if (!nft_is_active(net, obj))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+ if (filter &&
+ filter->type != NFT_OBJECT_UNSPEC &&
+ obj->type->type != filter->type)
+ goto cont;
+
+ if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWOBJ,
+ NLM_F_MULTI | NLM_F_APPEND,
+ afi->family, table, obj, reset) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+
+ return 0;
+}
+
+static struct nft_obj_filter *
+nft_obj_filter_alloc(const struct nlattr * const nla[])
+{
+ struct nft_obj_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ if (nla[NFTA_OBJ_TABLE])
+ nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
+ NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TYPE])
+ filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+
+ return filter;
+}
+
+static int nf_tables_getobj(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
+ int family = nfmsg->nfgen_family;
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct nft_object *obj;
+ struct sk_buff *skb2;
+ bool reset = false;
+ u32 objtype;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_obj,
+ .done = nf_tables_dump_obj_done,
+ };
+
+ if (nla[NFTA_OBJ_TABLE] ||
+ nla[NFTA_OBJ_TYPE]) {
+ struct nft_obj_filter *filter;
+
+ filter = nft_obj_filter_alloc(nla);
+ if (IS_ERR(filter))
+ return -ENOMEM;
+
+ c.data = filter;
+ }
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_OBJ_NAME] ||
+ !nla[NFTA_OBJ_TYPE])
+ return -EINVAL;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ reset = true;
+
+ err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+ family, table, obj, reset);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+
+ return 0;
+}
+
+static void nft_obj_destroy(struct nft_object *obj)
+{
+ if (obj->type->destroy)
+ obj->type->destroy(obj);
+
+ module_put(obj->type->owner);
+ kfree(obj);
+}
+
+static int nf_tables_delobj(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_object *obj;
+ struct nft_ctx ctx;
+ u32 objtype;
+
+ if (!nla[NFTA_OBJ_TYPE] ||
+ !nla[NFTA_OBJ_NAME])
+ return -EINVAL;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ if (obj->use > 0)
+ return -EBUSY;
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+
+ return nft_delobj(&ctx, obj);
+}
+
+int nft_obj_notify(struct net *net, struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ int family, int report, gfp_t gfp)
+{
+ struct sk_buff *skb;
+ int err;
+
+ if (!report &&
+ !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
+ table, obj, false);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+err:
+ if (err < 0) {
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(nft_obj_notify);
+
+static int nf_tables_obj_notify(const struct nft_ctx *ctx,
+ struct nft_object *obj, int event)
+{
+ return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+ ctx->seq, event, ctx->afi->family, ctx->report,
+ GFP_KERNEL);
+}
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -3998,6 +4643,26 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_GETGEN] = {
.call = nf_tables_getgen,
},
+ [NFT_MSG_NEWOBJ] = {
+ .call_batch = nf_tables_newobj,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
+ [NFT_MSG_GETOBJ] = {
+ .call = nf_tables_getobj,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
+ [NFT_MSG_DELOBJ] = {
+ .call_batch = nf_tables_delobj,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
+ [NFT_MSG_GETOBJ_RESET] = {
+ .call = nf_tables_getobj,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -4040,6 +4705,9 @@ static void nf_tables_commit_release(struct nft_trans *trans)
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv, true);
break;
+ case NFT_MSG_DELOBJ:
+ nft_obj_destroy(nft_trans_obj(trans));
+ break;
}
kfree(trans);
}
@@ -4147,6 +4815,17 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
atomic_dec(&te->set->nelems);
te->set->ndeact--;
break;
+ case NFT_MSG_NEWOBJ:
+ nft_clear(net, nft_trans_obj(trans));
+ nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+ NFT_MSG_NEWOBJ);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELOBJ:
+ list_del_rcu(&nft_trans_obj(trans)->list);
+ nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+ NFT_MSG_DELOBJ);
+ break;
}
}
@@ -4181,6 +4860,9 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv, true);
break;
+ case NFT_MSG_NEWOBJ:
+ nft_obj_destroy(nft_trans_obj(trans));
+ break;
}
kfree(trans);
}
@@ -4261,6 +4943,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
+ case NFT_MSG_NEWOBJ:
+ trans->ctx.table->use--;
+ list_del_rcu(&nft_trans_obj(trans)->list);
+ break;
+ case NFT_MSG_DELOBJ:
+ trans->ctx.table->use++;
+ nft_clear(trans->ctx.net, nft_trans_obj(trans));
+ nft_trans_destroy(trans);
+ break;
}
}
@@ -4329,9 +5020,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
const struct nft_chain *chain);
static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
const struct nft_data *data;
@@ -4355,7 +5046,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
{
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- const struct nft_set *set;
+ struct nft_set *set;
struct nft_set_binding *binding;
struct nft_set_iter iter;
@@ -4807,6 +5498,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
{
struct nft_table *table, *nt;
struct nft_chain *chain, *nc;
+ struct nft_object *obj, *ne;
struct nft_rule *rule, *nr;
struct nft_set *set, *ns;
struct nft_ctx ctx = {
@@ -4833,6 +5525,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
table->use--;
nft_set_destroy(set);
}
+ list_for_each_entry_safe(obj, ne, &table->objects, list) {
+ list_del(&obj->list);
+ table->use--;
+ nft_obj_destroy(obj);
+ }
list_for_each_entry_safe(chain, nc, &table->chains, list) {
list_del(&chain->list);
table->use--;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0dd5c695482f..65dbeadcb118 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
nft_trace_notify(info);
- nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
+ nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
+ nft_in(pkt), nft_out(pkt), &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type], rulenum);
}
static inline void nft_trace_packet(struct nft_traceinfo *info,
@@ -124,7 +124,7 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
const struct nft_chain *chain = priv, *basechain = chain;
- const struct net *net = pkt->net;
+ const struct net *net = nft_net(pkt);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
@@ -178,6 +178,7 @@ next_rule:
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
+ case NF_STOLEN:
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
@@ -231,68 +232,40 @@ next_rule:
}
EXPORT_SYMBOL_GPL(nft_do_chain);
+static struct nft_expr_type *nft_basic_types[] = {
+ &nft_imm_type,
+ &nft_cmp_type,
+ &nft_lookup_type,
+ &nft_bitwise_type,
+ &nft_byteorder_type,
+ &nft_payload_type,
+ &nft_dynset_type,
+ &nft_range_type,
+};
+
int __init nf_tables_core_module_init(void)
{
- int err;
-
- err = nft_immediate_module_init();
- if (err < 0)
- goto err1;
-
- err = nft_cmp_module_init();
- if (err < 0)
- goto err2;
-
- err = nft_lookup_module_init();
- if (err < 0)
- goto err3;
-
- err = nft_bitwise_module_init();
- if (err < 0)
- goto err4;
+ int err, i;
- err = nft_byteorder_module_init();
- if (err < 0)
- goto err5;
-
- err = nft_payload_module_init();
- if (err < 0)
- goto err6;
-
- err = nft_dynset_module_init();
- if (err < 0)
- goto err7;
-
- err = nft_range_module_init();
- if (err < 0)
- goto err8;
+ for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
+ err = nft_register_expr(nft_basic_types[i]);
+ if (err)
+ goto err;
+ }
return 0;
-err8:
- nft_dynset_module_exit();
-err7:
- nft_payload_module_exit();
-err6:
- nft_byteorder_module_exit();
-err5:
- nft_bitwise_module_exit();
-err4:
- nft_lookup_module_exit();
-err3:
- nft_cmp_module_exit();
-err2:
- nft_immediate_module_exit();
-err1:
+
+err:
+ while (i-- > 0)
+ nft_unregister_expr(nft_basic_types[i]);
return err;
}
void nf_tables_core_module_exit(void)
{
- nft_dynset_module_exit();
- nft_payload_module_exit();
- nft_byteorder_module_exit();
- nft_bitwise_module_exit();
- nft_lookup_module_exit();
- nft_cmp_module_exit();
- nft_immediate_module_exit();
+ int i;
+
+ i = ARRAY_SIZE(nft_basic_types);
+ while (i-- > 0)
+ nft_unregister_expr(nft_basic_types[i]);
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index ab695f8e2d29..12eb9041dca2 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
unsigned int size;
int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
- if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
+ if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
@@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
+ if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
@@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
goto nla_put_failure;
if (!info->packet_dumped) {
- if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
+ if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
goto nla_put_failure;
if (nf_trace_fill_pkt_info(skb, pkt))
@@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
}
nlmsg_end(skb, nlh);
- nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+ nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
return;
nla_put_failure:
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2278d9ab723b..a09fa9fd8f3d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -22,7 +22,7 @@
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/sock.h>
#include <linux/init.h>
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index eb086a192c5a..08247bf7d7b8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -80,7 +80,7 @@ struct nfulnl_instance {
#define INSTANCE_BUCKETS 16
-static int nfnl_log_net_id __read_mostly;
+static unsigned int nfnl_log_net_id __read_mostly;
struct nfnl_log_net {
spinlock_t instances_lock;
@@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = alloc_skb(n, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
@@ -538,7 +538,7 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- if (skb->tstamp.tv64) {
+ if (skb->tstamp) {
struct nfulnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
@@ -1152,6 +1152,7 @@ MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
+MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
module_init(nfnetlink_log_init);
module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index af832c526048..3ee0b8a000a4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -69,7 +69,7 @@ struct nfqnl_instance {
* Following fields are dirtied for each queued packet,
* keep them in same cache line if possible.
*/
- spinlock_t lock;
+ spinlock_t lock ____cacheline_aligned_in_smp;
unsigned int queue_total;
unsigned int id_sequence; /* 'sequence' of pkt ids */
struct list_head queue_list; /* packets in queue */
@@ -77,7 +77,7 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
-static int nfnl_queue_net_id __read_mostly;
+static unsigned int nfnl_queue_net_id __read_mostly;
#define INSTANCE_BUCKETS 16
struct nfnl_queue_net {
@@ -384,7 +384,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
- if (entskb->tstamp.tv64)
+ if (entskb->tstamp)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
size += nfqnl_get_bridge_size(entry);
@@ -555,7 +555,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
- if (entskb->tstamp.tv64) {
+ if (entskb->tstamp) {
struct nfqnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
@@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = {
static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
{
- return rcu_access_pointer(entry->state.hook_entries) ==
+ return rcu_access_pointer(entry->hook) ==
(struct nf_hook_entry *)entry_ptr;
}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 31c15ed2e5fc..877d9acd91ef 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -121,7 +121,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_bitwise_type;
static const struct nft_expr_ops nft_bitwise_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
@@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = {
.dump = nft_bitwise_dump,
};
-static struct nft_expr_type nft_bitwise_type __read_mostly = {
+struct nft_expr_type nft_bitwise_type __read_mostly = {
.name = "bitwise",
.ops = &nft_bitwise_ops,
.policy = nft_bitwise_policy,
.maxattr = NFTA_BITWISE_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_bitwise_module_init(void)
-{
- return nft_register_expr(&nft_bitwise_type);
-}
-
-void nft_bitwise_module_exit(void)
-{
- nft_unregister_expr(&nft_bitwise_type);
-}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index ee63d981268d..13d4e421a6b3 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -169,7 +169,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_byteorder_type;
static const struct nft_expr_ops nft_byteorder_ops = {
.type = &nft_byteorder_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
@@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = {
.dump = nft_byteorder_dump,
};
-static struct nft_expr_type nft_byteorder_type __read_mostly = {
+struct nft_expr_type nft_byteorder_type __read_mostly = {
.name = "byteorder",
.ops = &nft_byteorder_ops,
.policy = nft_byteorder_policy,
.maxattr = NFTA_BYTEORDER_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_byteorder_module_init(void)
-{
- return nft_register_expr(&nft_byteorder_type);
-}
-
-void nft_byteorder_module_exit(void)
-{
- nft_unregister_expr(&nft_byteorder_type);
-}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2e53739812b1..2b96effeadc1 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- if (desc.len > U8_MAX)
- return -ERANGE;
-
priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
@@ -110,7 +107,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_cmp_type;
static const struct nft_expr_ops nft_cmp_ops = {
.type = &nft_cmp_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
@@ -211,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
return &nft_cmp_ops;
}
-static struct nft_expr_type nft_cmp_type __read_mostly = {
+struct nft_expr_type nft_cmp_type __read_mostly = {
.name = "cmp",
.select_ops = nft_cmp_select_ops,
.policy = nft_cmp_policy,
.maxattr = NFTA_CMP_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_cmp_module_init(void)
-{
- return nft_register_expr(&nft_cmp_type);
-}
-
-void nft_cmp_module_exit(void)
-{
- nft_unregister_expr(&nft_cmp_type);
-}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 77db8358ab14..7f8422213341 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -18,105 +18,197 @@
#include <net/netfilter/nf_tables.h>
struct nft_counter {
- u64 bytes;
- u64 packets;
-};
-
-struct nft_counter_percpu {
- struct nft_counter counter;
- struct u64_stats_sync syncp;
+ s64 bytes;
+ s64 packets;
};
struct nft_counter_percpu_priv {
- struct nft_counter_percpu __percpu *counter;
+ struct nft_counter __percpu *counter;
};
-static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+
+static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu *this_cpu;
+ struct nft_counter *this_cpu;
+ seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- u64_stats_update_begin(&this_cpu->syncp);
- this_cpu->counter.bytes += pkt->skb->len;
- this_cpu->counter.packets++;
- u64_stats_update_end(&this_cpu->syncp);
+ myseq = this_cpu_ptr(&nft_counter_seq);
+
+ write_seqcount_begin(myseq);
+
+ this_cpu->bytes += pkt->skb->len;
+ this_cpu->packets++;
+
+ write_seqcount_end(myseq);
local_bh_enable();
}
-static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+static inline void nft_counter_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_do_init(const struct nlattr * const tb[],
+ struct nft_counter_percpu_priv *priv)
+{
+ struct nft_counter __percpu *cpu_stats;
+ struct nft_counter *this_cpu;
+
+ cpu_stats = alloc_percpu(struct nft_counter);
+ if (cpu_stats == NULL)
+ return -ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ if (tb[NFTA_COUNTER_PACKETS]) {
+ this_cpu->packets =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ }
+ if (tb[NFTA_COUNTER_BYTES]) {
+ this_cpu->bytes =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ }
+ preempt_enable();
+ priv->counter = cpu_stats;
+ return 0;
+}
+
+static int nft_counter_obj_init(const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ return nft_counter_do_init(tb, priv);
+}
+
+static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
+{
+ free_percpu(priv->counter);
+}
+
+static void nft_counter_obj_destroy(struct nft_object *obj)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ nft_counter_do_destroy(priv);
+}
+
+static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv,
struct nft_counter *total)
{
- const struct nft_counter_percpu *cpu_stats;
+ struct nft_counter *this_cpu;
+
+ local_bh_disable();
+ this_cpu = this_cpu_ptr(priv->counter);
+ this_cpu->packets -= total->packets;
+ this_cpu->bytes -= total->bytes;
+ local_bh_enable();
+}
+
+static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
+ struct nft_counter *total)
+{
+ struct nft_counter *this_cpu;
+ const seqcount_t *myseq;
u64 bytes, packets;
unsigned int seq;
int cpu;
memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- cpu_stats = per_cpu_ptr(counter, cpu);
+ myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+ this_cpu = per_cpu_ptr(priv->counter, cpu);
do {
- seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- bytes = cpu_stats->counter.bytes;
- packets = cpu_stats->counter.packets;
- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+ seq = read_seqcount_begin(myseq);
+ bytes = this_cpu->bytes;
+ packets = this_cpu->packets;
+ } while (read_seqcount_retry(myseq, seq));
- total->packets += packets;
- total->bytes += bytes;
+ total->bytes += bytes;
+ total->packets += packets;
}
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_counter_do_dump(struct sk_buff *skb,
+ struct nft_counter_percpu_priv *priv,
+ bool reset)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter total;
- nft_counter_fetch(priv->counter, &total);
+ nft_counter_fetch(priv, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
NFTA_COUNTER_PAD) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets),
NFTA_COUNTER_PAD))
goto nla_put_failure;
+
+ if (reset)
+ nft_counter_reset(priv, &total);
+
return 0;
nla_put_failure:
return -1;
}
+static int nft_counter_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ return nft_counter_do_dump(skb, priv, reset);
+}
+
static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
+static struct nft_object_type nft_counter_obj __read_mostly = {
+ .type = NFT_OBJECT_COUNTER,
+ .size = sizeof(struct nft_counter_percpu_priv),
+ .maxattr = NFTA_COUNTER_MAX,
+ .policy = nft_counter_policy,
+ .eval = nft_counter_obj_eval,
+ .init = nft_counter_obj_init,
+ .destroy = nft_counter_obj_destroy,
+ .dump = nft_counter_obj_dump,
+ .owner = THIS_MODULE,
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+ nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+ return nft_counter_do_dump(skb, priv, false);
+}
+
static int nft_counter_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu __percpu *cpu_stats;
- struct nft_counter_percpu *this_cpu;
-
- cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
- if (cpu_stats == NULL)
- return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->counter.packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
- }
- if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->counter.bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
- }
- preempt_enable();
- priv->counter = cpu_stats;
- return 0;
+ return nft_counter_do_init(tb, priv);
}
static void nft_counter_destroy(const struct nft_ctx *ctx,
@@ -124,28 +216,27 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- free_percpu(priv->counter);
+ nft_counter_do_destroy(priv);
}
static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
- struct nft_counter_percpu __percpu *cpu_stats;
- struct nft_counter_percpu *this_cpu;
+ struct nft_counter __percpu *cpu_stats;
+ struct nft_counter *this_cpu;
struct nft_counter total;
- nft_counter_fetch(priv->counter, &total);
+ nft_counter_fetch(priv, &total);
- cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
- GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
if (cpu_stats == NULL)
return -ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
- this_cpu->counter.packets = total.packets;
- this_cpu->counter.bytes = total.bytes;
+ this_cpu->packets = total.packets;
+ this_cpu->bytes = total.bytes;
preempt_enable();
priv_clone->counter = cpu_stats;
@@ -174,12 +265,29 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
static int __init nft_counter_module_init(void)
{
- return nft_register_expr(&nft_counter_type);
+ int cpu, err;
+
+ for_each_possible_cpu(cpu)
+ seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+
+ err = nft_register_obj(&nft_counter_obj);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_counter_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_counter_obj);
+ return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
+ nft_unregister_obj(&nft_counter_obj);
}
module_init(nft_counter_module_init);
@@ -188,3 +296,4 @@ module_exit(nft_counter_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("counter");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index d7b0d171172a..e6baeaebe653 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -207,37 +208,37 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
-static int nft_ct_l3proto_try_module_get(uint8_t family)
+static int nft_ct_netns_get(struct net *net, uint8_t family)
{
int err;
if (family == NFPROTO_INET) {
- err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4);
+ err = nf_ct_netns_get(net, NFPROTO_IPV4);
if (err < 0)
goto err1;
- err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6);
+ err = nf_ct_netns_get(net, NFPROTO_IPV6);
if (err < 0)
goto err2;
} else {
- err = nf_ct_l3proto_try_module_get(family);
+ err = nf_ct_netns_get(net, family);
if (err < 0)
goto err1;
}
return 0;
err2:
- nf_ct_l3proto_module_put(NFPROTO_IPV4);
+ nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
-static void nft_ct_l3proto_module_put(uint8_t family)
+static void nft_ct_netns_put(struct net *net, uint8_t family)
{
if (family == NFPROTO_INET) {
- nf_ct_l3proto_module_put(NFPROTO_IPV4);
- nf_ct_l3proto_module_put(NFPROTO_IPV6);
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+ nf_ct_netns_put(net, NFPROTO_IPV6);
} else
- nf_ct_l3proto_module_put(family);
+ nf_ct_netns_put(net, family);
}
static int nft_ct_get_init(const struct nft_ctx *ctx,
@@ -341,7 +342,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ err = nft_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
return err;
@@ -389,7 +390,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ err = nft_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
goto err1;
@@ -404,7 +405,7 @@ err1:
static void nft_ct_get_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- nft_ct_l3proto_module_put(ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
}
static void nft_ct_set_destroy(const struct nft_ctx *ctx,
@@ -422,7 +423,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
break;
}
- nft_ct_l3proto_module_put(ctx->afi->family);
+ nft_ct_netns_put(ctx->net, ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = {
.owner = THIS_MODULE,
};
+static void nft_notrack_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct sk_buff *skb = pkt->skb;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ /* Previously seen (loopback or untracked)? Ignore. */
+ if (ct)
+ return;
+
+ ct = nf_ct_untracked_get();
+ atomic_inc(&ct->ct_general.use);
+ skb->nfct = &ct->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+}
+
+static struct nft_expr_type nft_notrack_type;
+static const struct nft_expr_ops nft_notrack_ops = {
+ .type = &nft_notrack_type,
+ .size = NFT_EXPR_SIZE(0),
+ .eval = nft_notrack_eval,
+};
+
+static struct nft_expr_type nft_notrack_type __read_mostly = {
+ .name = "notrack",
+ .ops = &nft_notrack_ops,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
+ int err;
+
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
- return nft_register_expr(&nft_ct_type);
+ err = nft_register_expr(&nft_ct_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_notrack_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_expr(&nft_ct_type);
+ return err;
}
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("ct");
+MODULE_ALIAS_NFT_EXPR("notrack");
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 31ca94793aa9..049ad2d9ee66 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -98,7 +98,8 @@ out:
}
static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
- [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
[NFTA_DYNSET_OP] = { .type = NLA_U32 },
[NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
@@ -268,7 +269,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_dynset_type;
static const struct nft_expr_ops nft_dynset_ops = {
.type = &nft_dynset_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
@@ -278,20 +278,10 @@ static const struct nft_expr_ops nft_dynset_ops = {
.dump = nft_dynset_dump,
};
-static struct nft_expr_type nft_dynset_type __read_mostly = {
+struct nft_expr_type nft_dynset_type __read_mostly = {
.name = "dynset",
.ops = &nft_dynset_ops,
.policy = nft_dynset_policy,
.maxattr = NFTA_DYNSET_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_dynset_module_init(void)
-{
- return nft_register_expr(&nft_dynset_type);
-}
-
-void nft_dynset_module_exit(void)
-{
- nft_unregister_expr(&nft_dynset_type);
-}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644
index 000000000000..29a4906adc27
--- /dev/null
+++ b/net/netfilter/nft_fib.c
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
+ [NFTA_FIB_DREG] = { .type = NLA_U32 },
+ [NFTA_FIB_RESULT] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(nft_fib_policy);
+
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF: /* fallthrough */
+ case NFT_FIB_RESULT_OIFNAME:
+ hooks = (1 << NF_INET_PRE_ROUTING);
+ break;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ if (priv->flags & NFTA_FIB_F_IIF)
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ else
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_fib_validate);
+
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_fib *priv = nft_expr_priv(expr);
+ unsigned int len;
+ int err;
+
+ if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
+ return -EINVAL;
+
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
+
+ if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ return -EINVAL;
+
+ if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
+ (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
+ return -EINVAL;
+ if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
+ (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
+ return -EINVAL;
+ if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
+ return -EINVAL;
+
+ priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+ priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ if (priv->flags & NFTA_FIB_F_OIF)
+ return -EINVAL;
+ len = sizeof(int);
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ if (priv->flags & NFTA_FIB_F_OIF)
+ return -EINVAL;
+ len = IFNAMSIZ;
+ break;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ len = sizeof(u32);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+ if (err < 0)
+ return err;
+
+ return nft_fib_validate(ctx, expr, NULL);
+}
+EXPORT_SYMBOL_GPL(nft_fib_init);
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
+ return -1;
+
+ if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
+ return -1;
+
+ if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_dump);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+ const struct nft_pktinfo *pkt, int index)
+{
+ struct net_device *dev;
+ u32 *dreg = reg;
+
+ switch (r) {
+ case NFT_FIB_RESULT_OIF:
+ *dreg = index;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ dev = dev_get_by_index_rcu(nft_net(pkt), index);
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ *dreg = 0;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_fib_store_result);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644
index 000000000000..9120fc7228f4
--- /dev/null
+++ b/net/netfilter/nft_fib_inet.c
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_inet_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case NFPROTO_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_fib_inet_type;
+static const struct nft_expr_ops nft_fib_inet_ops = {
+ .type = &nft_fib_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_inet_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "fib",
+ .ops = &nft_fib_inet_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_inet_module_init(void)
+{
+ return nft_register_expr(&nft_fib_inet_type);
+}
+
+static void __exit nft_fib_inet_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_inet_type);
+}
+
+module_init(nft_fib_inet_module_init);
+module_exit(nft_fib_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 763ebc3e0b2b..ce13a50b9189 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
- nf_dup_netdev_egress(pkt, oif);
- regs->verdict.code = NF_DROP;
+ nf_fwd_netdev_egress(pkt, oif);
+ regs->verdict.code = NF_STOLEN;
}
static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index d5447a22275c..eb2721af898d 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -58,7 +58,6 @@ static int nft_hash_init(const struct nft_ctx *ctx,
if (!tb[NFTA_HASH_SREG] ||
!tb[NFTA_HASH_DREG] ||
!tb[NFTA_HASH_LEN] ||
- !tb[NFTA_HASH_SEED] ||
!tb[NFTA_HASH_MODULUS])
return -EINVAL;
@@ -83,7 +82,10 @@ static int nft_hash_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+ if (tb[NFTA_HASH_SEED])
+ priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+ else
+ get_random_bytes(&priv->seed, sizeof(priv->seed));
return nft_validate_register_load(priv->sreg, len) &&
nft_validate_register_store(ctx, priv->dreg, NULL,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index d17018ff54e6..728baf88295a 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (desc.len > U8_MAX)
- return -ERANGE;
-
priv->dlen = desc.len;
priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
@@ -105,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
return 0;
}
-static struct nft_expr_type nft_imm_type;
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -116,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = {
.validate = nft_immediate_validate,
};
-static struct nft_expr_type nft_imm_type __read_mostly = {
+struct nft_expr_type nft_imm_type __read_mostly = {
.name = "immediate",
.ops = &nft_imm_ops,
.policy = nft_immediate_policy,
.maxattr = NFTA_IMMEDIATE_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_immediate_module_init(void)
-{
- return nft_register_expr(&nft_imm_type);
-}
-
-void nft_immediate_module_exit(void)
-{
- nft_unregister_expr(&nft_imm_type);
-}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 1b01404bb33f..6f6e64423643 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -32,13 +32,15 @@ static void nft_log_eval(const struct nft_expr *expr,
{
const struct nft_log *priv = nft_expr_priv(expr);
- nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
- pkt->out, &priv->loginfo, "%s", priv->prefix);
+ nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
+ nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
+ priv->prefix);
}
static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_GROUP] = { .type = NLA_U16 },
- [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
+ [NFTA_LOG_PREFIX] = { .type = NLA_STRING,
+ .len = NF_LOG_PREFIXLEN - 1 },
[NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
[NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
[NFTA_LOG_LEVEL] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 8166b6994cc7..e21aea7e5ec8 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr,
const struct nft_set_ext *ext;
bool found;
- found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^
- priv->invert;
-
+ found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
+ &ext) ^ priv->invert;
if (!found) {
regs->verdict.code = NFT_BREAK;
return;
@@ -50,7 +49,8 @@ static void nft_lookup_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
- [NFTA_LOOKUP_SET] = { .type = NLA_STRING },
+ [NFTA_LOOKUP_SET] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
@@ -155,7 +155,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_lookup_type;
static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
@@ -165,20 +164,10 @@ static const struct nft_expr_ops nft_lookup_ops = {
.dump = nft_lookup_dump,
};
-static struct nft_expr_type nft_lookup_type __read_mostly = {
+struct nft_expr_type nft_lookup_type __read_mostly = {
.name = "lookup",
.ops = &nft_lookup_ops,
.policy = nft_lookup_policy,
.maxattr = NFTA_LOOKUP_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_lookup_module_init(void)
-{
- return nft_register_expr(&nft_lookup_type);
-}
-
-void nft_lookup_module_exit(void)
-{
- nft_unregister_expr(&nft_lookup_type);
-}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 81b5ad6165ac..11ce016cd479 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -77,7 +77,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
}
}
- return 0;
+ return nf_ct_netns_get(ctx->net, ctx->afi->family);
}
EXPORT_SYMBOL_GPL(nft_masq_init);
@@ -105,4 +105,4 @@ nla_put_failure:
EXPORT_SYMBOL_GPL(nft_masq_dump);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6c1e0246706e..66c7f4b4c49b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- const struct net_device *in = pkt->in, *out = pkt->out;
+ const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
@@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- *dest = pkt->pf;
+ *dest = nft_pf(pkt);
break;
case NFT_META_L4PROTO:
if (!pkt->tprot_set)
@@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
}
- switch (pkt->pf) {
+ switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
*dest = PACKET_MULTICAST;
@@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
case NFPROTO_NETDEV:
hooks = 1 << NF_NETDEV_INGRESS;
break;
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = 1 << NF_INET_PRE_ROUTING;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ee2d71753746..19a7bf3236f9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -209,7 +209,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(ctx->net, family);
}
static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -257,12 +257,21 @@ nla_put_failure:
return -1;
}
+static void
+nft_nat_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+
+ nf_ct_netns_put(ctx->net, priv->family);
+}
+
static struct nft_expr_type nft_nat_type;
static const struct nft_expr_ops nft_nat_ops = {
.type = &nft_nat_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
.eval = nft_nat_eval,
.init = nft_nat_init,
+ .destroy = nft_nat_destroy,
.dump = nft_nat_dump,
.validate = nft_nat_validate,
};
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 55bc5ab78d4a..a66b36097b8f 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
return -EOVERFLOW;
priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
- atomic_set(&priv->counter, 0);
+ atomic_set(&priv->counter, priv->modulus - 1);
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, sizeof(u32));
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
new file mode 100644
index 000000000000..1ae8c49ca4a1
--- /dev/null
+++ b/net/netfilter/nft_objref.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
+
+static void nft_objref_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->type->eval(obj, regs, pkt);
+}
+
+static int nft_objref_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ u32 objtype;
+
+ if (!tb[NFTA_OBJREF_IMM_NAME] ||
+ !tb[NFTA_OBJREF_IMM_TYPE])
+ return -EINVAL;
+
+ objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
+ obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+ genmask);
+ if (IS_ERR(obj))
+ return -ENOENT;
+
+ nft_objref_priv(expr) = obj;
+ obj->use++;
+
+ return 0;
+}
+
+static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_object *obj = nft_objref_priv(expr);
+
+ if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+ nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static void nft_objref_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->use--;
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct nft_expr_ops nft_objref_ops = {
+ .type = &nft_objref_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
+ .eval = nft_objref_eval,
+ .init = nft_objref_init,
+ .destroy = nft_objref_destroy,
+ .dump = nft_objref_dump,
+};
+
+struct nft_objref_map {
+ struct nft_set *set;
+ enum nft_registers sreg:8;
+ struct nft_set_binding binding;
+};
+
+static void nft_objref_map_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+ const struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
+ struct nft_object *obj;
+ bool found;
+
+ found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
+ &ext);
+ if (!found) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+ obj = *nft_set_ext_obj(ext);
+ obj->type->eval(obj, regs, pkt);
+}
+
+static int nft_objref_map_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set *set;
+ int err;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_OBJREF_SET_ID]) {
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_OBJREF_SET_ID],
+ genmask);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
+ if (!(set->flags & NFT_SET_OBJECT))
+ return -EINVAL;
+
+ priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]);
+ err = nft_validate_register_load(priv->sreg, set->klen);
+ if (err < 0)
+ return err;
+
+ priv->binding.flags = set->flags & NFT_SET_OBJECT;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ return err;
+
+ priv->set = set;
+ return 0;
+}
+
+static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) ||
+ nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static void nft_objref_map_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct nft_expr_ops nft_objref_map_ops = {
+ .type = &nft_objref_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
+ .eval = nft_objref_map_eval,
+ .init = nft_objref_map_init,
+ .destroy = nft_objref_map_destroy,
+ .dump = nft_objref_map_dump,
+};
+
+static const struct nft_expr_ops *
+nft_objref_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_OBJREF_SET_SREG] &&
+ (tb[NFTA_OBJREF_SET_NAME] ||
+ tb[NFTA_OBJREF_SET_ID]))
+ return &nft_objref_map_ops;
+ else if (tb[NFTA_OBJREF_IMM_NAME] &&
+ tb[NFTA_OBJREF_IMM_TYPE])
+ return &nft_objref_ops;
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = {
+ [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING,
+ .len = NFT_OBJ_MAXNAMELEN - 1 },
+ [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 },
+ [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 },
+ [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
+ [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_objref_type __read_mostly = {
+ .name = "objref",
+ .select_ops = nft_objref_select_ops,
+ .policy = nft_objref_policy,
+ .maxattr = NFTA_OBJREF_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_objref_module_init(void)
+{
+ return nft_register_expr(&nft_objref_type);
+}
+
+static void __exit nft_objref_module_exit(void)
+{
+ nft_unregister_expr(&nft_objref_type);
+}
+
+module_init(nft_objref_module_init);
+module_exit(nft_objref_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("objref");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b2f88617611a..7d699bbd45b0 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,10 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
@@ -148,7 +153,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_payload_type;
static const struct nft_expr_ops nft_payload_ops = {
.type = &nft_payload_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
@@ -165,6 +169,103 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.dump = nft_payload_dump,
};
+static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
+{
+ *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
+ if (*sum == 0)
+ *sum = CSUM_MANGLED_0;
+}
+
+static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
+{
+ struct udphdr *uh, _uh;
+
+ uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh);
+ if (!uh)
+ return false;
+
+ return uh->check;
+}
+
+static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ unsigned int *l4csum_offset)
+{
+ switch (pkt->tprot) {
+ case IPPROTO_TCP:
+ *l4csum_offset = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
+ return -1;
+ /* Fall through. */
+ case IPPROTO_UDPLITE:
+ *l4csum_offset = offsetof(struct udphdr, check);
+ break;
+ case IPPROTO_ICMPV6:
+ *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
+ break;
+ default:
+ return -1;
+ }
+
+ *l4csum_offset += pkt->xt.thoff;
+ return 0;
+}
+
+static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ __wsum fsum, __wsum tsum)
+{
+ int l4csum_offset;
+ __sum16 sum;
+
+ /* If we cannot determine layer 4 checksum offset or this packet doesn't
+ * require layer 4 checksum recalculation, skip this packet.
+ */
+ if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0)
+ return 0;
+
+ if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
+ return -1;
+
+ /* Checksum mangling for an arbitrary amount of bytes, based on
+ * inet_proto_csum_replace*() functions.
+ */
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ nft_csum_replace(&sum, fsum, tsum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum),
+ tsum);
+ }
+ } else {
+ sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum),
+ tsum));
+ }
+
+ if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
+ skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
+ __wsum fsum, __wsum tsum, int csum_offset)
+{
+ __sum16 sum;
+
+ if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ return -1;
+
+ nft_csum_replace(&sum, fsum, tsum);
+ if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ return -1;
+
+ return 0;
+}
+
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -174,7 +275,6 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
const u32 *src = &regs->data[priv->sreg];
int offset, csum_offset;
__wsum fsum, tsum;
- __sum16 sum;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
@@ -197,21 +297,18 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
csum_offset = offset + priv->csum_offset;
offset += priv->offset;
- if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+ if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) &&
(priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
skb->ip_summed != CHECKSUM_PARTIAL)) {
- if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
- goto err;
-
fsum = skb_checksum(skb, offset, priv->len, 0);
tsum = csum_partial(src, priv->len, 0);
- sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum),
- tsum));
- if (sum == 0)
- sum = CSUM_MANGLED_0;
- if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
- skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+ nft_payload_csum_inet(skb, src, fsum, tsum, csum_offset))
+ goto err;
+
+ if (priv->csum_flags &&
+ nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0)
goto err;
}
@@ -241,6 +338,15 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
priv->csum_offset =
ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
+ u32 flags;
+
+ flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS]));
+ if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR)
+ return -EINVAL;
+
+ priv->csum_flags = flags;
+ }
switch (priv->csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
@@ -263,7 +369,8 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr
nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
- htonl(priv->csum_offset)))
+ htonl(priv->csum_offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags)))
goto nla_put_failure;
return 0;
@@ -320,20 +427,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
return &nft_payload_ops;
}
-static struct nft_expr_type nft_payload_type __read_mostly = {
+struct nft_expr_type nft_payload_type __read_mostly = {
.name = "payload",
.select_ops = nft_payload_select_ops,
.policy = nft_payload_policy,
.maxattr = NFTA_PAYLOAD_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_payload_module_init(void)
-{
- return nft_register_expr(&nft_payload_type);
-}
-
-void nft_payload_module_exit(void)
-{
- nft_unregister_expr(&nft_payload_type);
-}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 393d359a1889..dbb6aaff67ec 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -38,12 +38,12 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->queues_total > 1) {
if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
- priv->queues_total, pkt->pf,
+ priv->queues_total, nft_pf(pkt),
jhash_initval);
}
}
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c00104c07095..2d6fe3559912 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -17,38 +17,59 @@
struct nft_quota {
u64 quota;
- bool invert;
- atomic64_t remain;
+ unsigned long flags;
+ atomic64_t consumed;
};
static inline bool nft_overquota(struct nft_quota *priv,
- const struct nft_pktinfo *pkt)
+ const struct sk_buff *skb)
{
- return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0;
+ return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota;
}
-static void nft_quota_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static inline bool nft_quota_invert(struct nft_quota *priv)
{
- struct nft_quota *priv = nft_expr_priv(expr);
+ return priv->flags & NFT_QUOTA_F_INV;
+}
- if (nft_overquota(priv, pkt) ^ priv->invert)
+static inline void nft_quota_do_eval(struct nft_quota *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
[NFTA_QUOTA_BYTES] = { .type = NLA_U64 },
[NFTA_QUOTA_FLAGS] = { .type = NLA_U32 },
+ [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 },
};
-static int nft_quota_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+#define NFT_QUOTA_DEPLETED_BIT 1 /* From NFT_QUOTA_F_DEPLETED. */
+
+static void nft_quota_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_quota *priv = nft_expr_priv(expr);
- u32 flags = 0;
- u64 quota;
+ struct nft_quota *priv = nft_obj_data(obj);
+ bool overquota;
+
+ overquota = nft_overquota(priv, pkt->skb);
+ if (overquota ^ nft_quota_invert(priv))
+ regs->verdict.code = NFT_BREAK;
+
+ if (overquota &&
+ !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
+ nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
+ NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
+}
+
+static int nft_quota_do_init(const struct nlattr * const tb[],
+ struct nft_quota *priv)
+{
+ unsigned long flags = 0;
+ u64 quota, consumed = 0;
if (!tb[NFTA_QUOTA_BYTES])
return -EINVAL;
@@ -57,34 +78,114 @@ static int nft_quota_init(const struct nft_ctx *ctx,
if (quota > S64_MAX)
return -EOVERFLOW;
+ if (tb[NFTA_QUOTA_CONSUMED]) {
+ consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED]));
+ if (consumed > quota)
+ return -EINVAL;
+ }
+
if (tb[NFTA_QUOTA_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
if (flags & ~NFT_QUOTA_F_INV)
return -EINVAL;
+ if (flags & NFT_QUOTA_F_DEPLETED)
+ return -EOPNOTSUPP;
}
priv->quota = quota;
- priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
- atomic64_set(&priv->remain, quota);
+ priv->flags = flags;
+ atomic64_set(&priv->consumed, consumed);
return 0;
}
-static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_quota_obj_init(const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_quota *priv = nft_obj_data(obj);
+
+ return nft_quota_do_init(tb, priv);
+}
+
+static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
+ bool reset)
{
- const struct nft_quota *priv = nft_expr_priv(expr);
- u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+ u64 consumed, consumed_cap;
+ u32 flags = priv->flags;
+
+ /* Since we inconditionally increment consumed quota for each packet
+ * that we see, don't go over the quota boundary in what we send to
+ * userspace.
+ */
+ consumed = atomic64_read(&priv->consumed);
+ if (consumed >= priv->quota) {
+ consumed_cap = priv->quota;
+ flags |= NFT_QUOTA_F_DEPLETED;
+ } else {
+ consumed_cap = consumed;
+ }
if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
NFTA_QUOTA_PAD) ||
+ nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap),
+ NFTA_QUOTA_PAD) ||
nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
goto nla_put_failure;
+
+ if (reset) {
+ atomic64_sub(consumed, &priv->consumed);
+ clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
+ }
return 0;
nla_put_failure:
return -1;
}
+static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+ bool reset)
+{
+ struct nft_quota *priv = nft_obj_data(obj);
+
+ return nft_quota_do_dump(skb, priv, reset);
+}
+
+static struct nft_object_type nft_quota_obj __read_mostly = {
+ .type = NFT_OBJECT_QUOTA,
+ .size = sizeof(struct nft_quota),
+ .maxattr = NFTA_QUOTA_MAX,
+ .policy = nft_quota_policy,
+ .init = nft_quota_obj_init,
+ .eval = nft_quota_obj_eval,
+ .dump = nft_quota_obj_dump,
+ .owner = THIS_MODULE,
+};
+
+static void nft_quota_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ nft_quota_do_eval(priv, regs, pkt);
+}
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ return nft_quota_do_init(tb, priv);
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ return nft_quota_do_dump(skb, priv, false);
+}
+
static struct nft_expr_type nft_quota_type;
static const struct nft_expr_ops nft_quota_ops = {
.type = &nft_quota_type,
@@ -105,12 +206,26 @@ static struct nft_expr_type nft_quota_type __read_mostly = {
static int __init nft_quota_module_init(void)
{
- return nft_register_expr(&nft_quota_type);
+ int err;
+
+ err = nft_register_obj(&nft_quota_obj);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_quota_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_quota_obj);
+ return err;
}
static void __exit nft_quota_module_exit(void)
{
- nft_unregister_expr(&nft_quota_type);
+ nft_unregister_expr(&nft_quota_type);
+ nft_unregister_obj(&nft_quota_obj);
}
module_init(nft_quota_module_init);
@@ -119,3 +234,4 @@ module_exit(nft_quota_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("quota");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA);
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 8f0aaaea1376..9edc74eedc10 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -128,7 +128,6 @@ nla_put_failure:
return -1;
}
-static struct nft_expr_type nft_range_type;
static const struct nft_expr_ops nft_range_ops = {
.type = &nft_range_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
@@ -137,20 +136,10 @@ static const struct nft_expr_ops nft_range_ops = {
.dump = nft_range_dump,
};
-static struct nft_expr_type nft_range_type __read_mostly = {
+struct nft_expr_type nft_range_type __read_mostly = {
.name = "range",
.ops = &nft_range_ops,
.policy = nft_range_policy,
.maxattr = NFTA_RANGE_MAX,
.owner = THIS_MODULE,
};
-
-int __init nft_range_module_init(void)
-{
- return nft_register_expr(&nft_range_type);
-}
-
-void nft_range_module_exit(void)
-{
- nft_unregister_expr(&nft_range_type);
-}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 03f7bf40ae75..40dcd05146d5 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -79,7 +79,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(ctx->net, ctx->afi->family);
}
EXPORT_SYMBOL_GPL(nft_redir_init);
@@ -108,4 +108,4 @@ nla_put_failure:
EXPORT_SYMBOL_GPL(nft_redir_dump);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index e79d9ca2ffee..9e90a02cb104 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
{
struct nft_reject *priv = nft_expr_priv(expr);
- switch (pkt->pf) {
+ switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
nf_send_unreach(pkt->skb, priv->icmp_code,
- pkt->hook);
+ nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(pkt->net, pkt->skb, pkt->hook);
+ nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
nft_reject_icmp_code(priv->icmp_code),
- pkt->hook);
+ nft_hook(pkt));
break;
}
break;
case NFPROTO_IPV6:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
- pkt->hook);
+ nf_send_unreach6(nft_net(pkt), pkt->skb,
+ priv->icmp_code, nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
+ nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
- nf_send_unreach6(pkt->net, pkt->skb,
+ nf_send_unreach6(nft_net(pkt), pkt->skb,
nft_reject_icmpv6_code(priv->icmp_code),
- pkt->hook);
+ nft_hook(pkt));
break;
}
break;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
new file mode 100644
index 000000000000..d3eb640bc784
--- /dev/null
+++ b/net/netfilter/nft_rt.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_rt {
+ enum nft_rt_keys key:8;
+ enum nft_registers dreg:8;
+};
+
+void nft_rt_get_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ u32 *dest = &regs->data[priv->dreg];
+ const struct dst_entry *dst;
+
+ dst = skb_dst(skb);
+ if (!dst)
+ goto err;
+
+ switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ case NFT_RT_CLASSID:
+ *dest = dst->tclassid;
+ break;
+#endif
+ case NFT_RT_NEXTHOP4:
+ if (nft_pf(pkt) != NFPROTO_IPV4)
+ goto err;
+
+ *dest = rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
+ break;
+ case NFT_RT_NEXTHOP6:
+ if (nft_pf(pkt) != NFPROTO_IPV6)
+ goto err;
+
+ memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ &ipv6_hdr(skb)->daddr),
+ sizeof(struct in6_addr));
+ break;
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ return;
+
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+ [NFTA_RT_DREG] = { .type = NLA_U32 },
+ [NFTA_RT_KEY] = { .type = NLA_U32 },
+};
+
+int nft_rt_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int len;
+
+ if (tb[NFTA_RT_KEY] == NULL ||
+ tb[NFTA_RT_DREG] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY]));
+ switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ case NFT_RT_CLASSID:
+#endif
+ case NFT_RT_NEXTHOP4:
+ len = sizeof(u32);
+ break;
+ case NFT_RT_NEXTHOP6:
+ len = sizeof(struct in6_addr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+}
+
+int nft_rt_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_rt_type;
+static const struct nft_expr_ops nft_rt_get_ops = {
+ .type = &nft_rt_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
+ .eval = nft_rt_get_eval,
+ .init = nft_rt_get_init,
+ .dump = nft_rt_get_dump,
+};
+
+static struct nft_expr_type nft_rt_type __read_mostly = {
+ .name = "rt",
+ .ops = &nft_rt_get_ops,
+ .policy = nft_rt_policy,
+ .maxattr = NFTA_RT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_rt_module_init(void)
+{
+ return nft_register_expr(&nft_rt_type);
+}
+
+static void __exit nft_rt_module_exit(void)
+{
+ nft_unregister_expr(&nft_rt_type);
+}
+
+module_init(nft_rt_module_init);
+module_exit(nft_rt_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
+MODULE_ALIAS_NFT_EXPR("rt");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index a3dface3e6e6..e36069fb76ae 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,6 +167,19 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
nft_set_elem_clear_busy(&he->ext);
}
+static bool nft_hash_deactivate_one(const struct net *net,
+ const struct nft_set *set, void *priv)
+{
+ struct nft_hash_elem *he = priv;
+
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext)) {
+ nft_set_elem_change_active(net, set, &he->ext);
+ return true;
+ }
+ return false;
+}
+
static void *nft_hash_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
@@ -181,13 +194,10 @@ static void *nft_hash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext))
- nft_set_elem_change_active(net, set, &he->ext);
- else
- he = NULL;
- }
+ if (he != NULL &&
+ !nft_hash_deactivate_one(net, set, he))
+ he = NULL;
+
rcu_read_unlock();
return he;
@@ -202,7 +212,7 @@ static void nft_hash_remove(const struct nft_set *set,
rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
}
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_hash *priv = nft_set_priv(set);
@@ -387,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
+ .deactivate_one = nft_hash_deactivate_one,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.update = nft_hash_update,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 36493a7cae88..f06f55ee516d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -171,6 +171,15 @@ static void nft_rbtree_activate(const struct net *net,
nft_set_elem_change_active(net, set, &rbe->ext);
}
+static bool nft_rbtree_deactivate_one(const struct net *net,
+ const struct nft_set *set, void *priv)
+{
+ struct nft_rbtree_elem *rbe = priv;
+
+ nft_set_elem_change_active(net, set, &rbe->ext);
+ return true;
+}
+
static void *nft_rbtree_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
@@ -204,7 +213,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
continue;
}
- nft_set_elem_change_active(net, set, &rbe->ext);
+ nft_rbtree_deactivate_one(net, set, rbe);
return rbe;
}
}
@@ -212,7 +221,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
struct nft_set_iter *iter)
{
const struct nft_rbtree *priv = nft_set_priv(set);
@@ -295,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
+ .deactivate_one = nft_rbtree_deactivate_one,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fc4977456c30..2ff499680cc6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+#define XT_PCPU_BLOCK_SIZE 4096
struct compat_delta {
unsigned int offset; /* offset in kernel */
@@ -958,7 +959,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!info) {
- info = vmalloc(sz);
+ info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_HIGHMEM,
+ PAGE_KERNEL);
if (!info)
return NULL;
}
@@ -982,7 +985,7 @@ void xt_free_table_info(struct xt_table_info *info)
}
EXPORT_SYMBOL(xt_free_table_info);
-/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+/* Find table by name, grabs mutex & ref. Returns NULL on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
@@ -1615,6 +1618,59 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
+/**
+ * xt_percpu_counter_alloc - allocate x_tables rule counter
+ *
+ * @state: pointer to xt_percpu allocation state
+ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
+ *
+ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
+ * contain the address of the real (percpu) counter.
+ *
+ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
+ * to fetch the real percpu counter.
+ *
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
+ * returns false on error.
+ */
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+ struct xt_counters *counter)
+{
+ BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
+
+ if (nr_cpu_ids <= 1)
+ return true;
+
+ if (!state->mem) {
+ state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+ XT_PCPU_BLOCK_SIZE);
+ if (!state->mem)
+ return false;
+ }
+ counter->pcnt = (__force unsigned long)(state->mem + state->off);
+ state->off += sizeof(*counter);
+ if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+ state->mem = NULL;
+ state->off = 0;
+ }
+ return true;
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
+
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+ unsigned long pcnt = counters->pcnt;
+
+ if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
+ free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
static int __net_init xt_net_init(struct net *net)
{
int i;
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 4973cbddc446..19247a17e511 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
goto errout;
audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
- info->type, par->hooknum, skb->len,
- par->in ? par->in->name : "?",
- par->out ? par->out->name : "?");
+ info->type, xt_hooknum(par), skb->len,
+ xt_in(par) ? xt_inname(par) : "?",
+ xt_out(par) ? xt_outname(par) : "?");
if (skb->mark)
audit_log_format(ab, " mark=%#x", skb->mark);
@@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
ntohs(eth_hdr(skb)->h_proto));
- if (par->family == NFPROTO_BRIDGE) {
+ if (xt_family(par) == NFPROTO_BRIDGE) {
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
audit_ip4(ab, skb);
@@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
}
}
- switch (par->family) {
+ switch (xt_family(par)) {
case NFPROTO_IPV4:
audit_ip4(ab, skb);
break;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index e04dc282e3bb..da56c06a443c 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -106,7 +106,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -115,7 +115,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target connsecmark_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 6669e68d589e..95c750358747 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -216,7 +216,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err1;
#endif
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
goto err1;
@@ -260,7 +260,7 @@ out:
err3:
nf_ct_tmpl_free(ct);
err2:
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
err1:
return ret;
}
@@ -341,7 +341,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
if (help)
module_put(help->helper->me);
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
xt_ct_destroy_timeout(ct);
nf_ct_put(info->ct);
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 1763ab82bcd7..c3b2017ebe41 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -32,15 +32,15 @@ static unsigned int
log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
+ struct net *net = xt_net(par);
struct nf_loginfo li;
- struct net *net = par->net;
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out,
- &li, "%s", loginfo->prefix);
+ nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+ xt_out(par), &li, "%s", loginfo->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index b253e07cb1c5..e45a01255e70 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
range->max_addr.ip6[i]);
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT)
+ if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+ xt_hooknum(par) == NF_INET_LOCAL_OUT)
new_addr.in6 = ipv6_hdr(skb)->daddr;
else
new_addr.in6 = ipv6_hdr(skb)->saddr;
@@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
newrange.min_proto = range->min_proto;
newrange.max_proto = range->max_proto;
- return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
}
static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return -EINVAL;
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static unsigned int
@@ -72,16 +77,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
struct nf_nat_range newrange;
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_POST_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT ||
- par->hooknum == NF_INET_LOCAL_IN);
+ NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+ xt_hooknum(par) == NF_INET_POST_ROUTING ||
+ xt_hooknum(par) == NF_INET_LOCAL_OUT ||
+ xt_hooknum(par) == NF_INET_LOCAL_IN);
ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT)
+ if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+ xt_hooknum(par) == NF_INET_LOCAL_OUT)
new_ip = ip_hdr(skb)->daddr & ~netmask;
else
new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -96,7 +101,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
newrange.max_proto = mr->range[0].max;
/* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
}
static int netmap_tg4_check(const struct xt_tgchk_param *par)
@@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static struct xt_target netmap_tg_reg[] __read_mostly = {
@@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg6_checkentry,
+ .destroy = netmap_tg_destroy,
.me = THIS_MODULE,
},
{
@@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg4_check,
+ .destroy = netmap_tg_destroy,
.me = THIS_MODULE,
},
};
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 8668a5c18dc3..c7f8958cea4a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -25,8 +25,8 @@ static unsigned int
nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
+ struct net *net = xt_net(par);
struct nf_loginfo li;
- struct net *net = par->net;
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
@@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (info->flags & XT_NFLOG_F_COPY_LEN)
li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
- nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
- par->out, &li, info->prefix);
+ nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
+ xt_in(par), xt_out(par), &li, info->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 8f1779ff7e30..a360b99a958a 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
if (info->queues_total > 1) {
queue = nfqueue_hash(skb, queue, info->queues_total,
- par->family, jhash_initval);
+ xt_family(par), jhash_initval);
}
return NF_QUEUE_NR(queue);
}
@@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
queue = info->queuenum + cpu % info->queues_total;
} else {
queue = nfqueue_hash(skb, queue, info->queues_total,
- par->family, jhash_initval);
+ xt_family(par), jhash_initval);
}
}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dbd6c4a12b97..91a373a3f534 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -63,7 +63,7 @@ void xt_rateest_put(struct xt_rateest *est)
mutex_lock(&xt_rateest_mutex);
if (--est->refcnt == 0) {
hlist_del(&est->list);
- gen_kill_estimator(&est->bstats, &est->rstats);
+ gen_kill_estimator(&est->rate_est);
/*
* gen_estimator est_timer() might access est->lock or bstats,
* wait a RCU grace period before freeing 'est'
@@ -132,7 +132,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;
- ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
+ ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
&est->lock, NULL, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 03f0b370e178..98a4c6d4f1cb 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -31,7 +31,7 @@
static unsigned int
redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
- return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum);
+ return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par));
}
static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
- return 0;
+
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
/* FIXME: Take multiple ranges --RR */
@@ -56,13 +62,13 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static unsigned int
redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum);
+ return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par));
}
static struct xt_target redirect_tg_reg[] __read_mostly = {
@@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
.revision = 0,
.table = "nat",
.checkentry = redirect_tg6_checkentry,
+ .destroy = redirect_tg_destroy,
.target = redirect_tg6,
.targetsize = sizeof(struct nf_nat_range),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
.table = "nat",
.target = redirect_tg4,
.checkentry = redirect_tg4_check,
+ .destroy = redirect_tg_destroy,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_OUT),
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 872db2d0e2a9..27241a767f17 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
return -1;
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- struct net *net = par->net;
+ struct net *net = xt_net(par);
unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
@@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
* length IPv6 header of 60, ergo the default MSS value is 1220
* Since no MSS was provided, we must use the default values
*/
- if (par->family == NFPROTO_IPV4)
+ if (xt_family(par) == NFPROTO_IPV4)
newmss = min(newmss, (u16)536);
else
newmss = min(newmss, (u16)1220);
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 0471db4032c5..1c57ace75ae6 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_tee_tginfo *info = par->targinfo;
int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
+ nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif);
return XT_CONTINUE;
}
@@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_tee_tginfo *info = par->targinfo;
int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
+ nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 663c4c3c9072..80cb7babeb64 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info *tgi = par->targinfo;
- return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
+ tgi->mark_mask, tgi->mark_value);
}
static unsigned int
@@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
- return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
+ tgi->mark_mask, tgi->mark_value);
}
#ifdef XT_TPROXY_HAVE_IPV6
@@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+ sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
- par->in, NFT_LOOKUP_ESTABLISHED);
+ xt_in(par), NFT_LOOKUP_ESTABLISHED);
laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
lport = tgi->lport ? tgi->lport : hp->dest;
@@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
tproto, &iph->saddr, laddr,
hp->source, lport,
- par->in, NFT_LOOKUP_LISTENER);
+ xt_in(par), NFT_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
if (sk && tproxy_sk_is_transparent(sk)) {
@@ -529,6 +531,11 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
static int tproxy_tg6_check(const struct xt_tgchk_param *par)
{
const struct ip6t_ip6 *i = par->entryinfo;
+ int err;
+
+ err = nf_defrag_ipv6_enable(par->net);
+ if (err)
+ return err;
if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
!(i->invflags & IP6T_INV_PROTO))
@@ -543,6 +550,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
static int tproxy_tg4_check(const struct xt_tgchk_param *par)
{
const struct ipt_ip *i = par->entryinfo;
+ int err;
+
+ err = nf_defrag_ipv4_enable(par->net);
+ if (err)
+ return err;
if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
&& !(i->invflags & IPT_INV_PROTO))
@@ -594,11 +606,6 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
static int __init tproxy_tg_init(void)
{
- nf_defrag_ipv4_enable();
-#ifdef XT_TPROXY_HAVE_IPV6
- nf_defrag_ipv6_enable();
-#endif
-
return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
}
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 11d6091991a4..e329dabde35f 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
static bool
addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = par->net;
+ struct net *net = xt_net(par);
const struct xt_addrtype_info *info = par->matchinfo;
const struct iphdr *iph = ip_hdr(skb);
bool ret = true;
@@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
static bool
addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = par->net;
+ struct net *net = xt_net(par);
const struct xt_addrtype_info_v1 *info = par->matchinfo;
const struct iphdr *iph;
const struct net_device *dev = NULL;
bool ret = true;
if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
- dev = par->in;
+ dev = xt_in(par);
else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
- dev = par->out;
+ dev = xt_out(par);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- if (par->family == NFPROTO_IPV6)
+ if (xt_family(par) == NFPROTO_IPV6)
return addrtype_mt6(net, dev, skb, info);
#endif
iph = ip_hdr(skb);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47ec4..2dedaa23ab0a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
#include <linux/netfilter/xt_bpf.h>
#include <linux/netfilter/x_tables.h>
@@ -20,15 +21,15 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_bpf");
MODULE_ALIAS("ip6t_bpf");
-static int bpf_mt_check(const struct xt_mtchk_param *par)
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+ struct bpf_prog **ret)
{
- struct xt_bpf_info *info = par->matchinfo;
struct sock_fprog_kern program;
- program.len = info->bpf_program_num_elem;
- program.filter = info->bpf_program;
+ program.len = len;
+ program.filter = insns;
- if (bpf_prog_create(&info->filter, &program)) {
+ if (bpf_prog_create(ret, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
return 0;
}
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ *ret = prog;
+ return 0;
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info *info = par->matchinfo;
+
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ if (info->mode == XT_BPF_MODE_BYTECODE)
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+ else if (info->mode == XT_BPF_MODE_FD_PINNED ||
+ info->mode == XT_BPF_MODE_FD_ELF)
+ return __bpf_mt_check_fd(info->fd, &info->filter);
+ else
+ return -EINVAL;
+}
+
static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
return BPF_PROG_RUN(info->filter, skb);
}
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
+
+ bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
bpf_prog_destroy(info->filter);
}
-static struct xt_match bpf_mt_reg __read_mostly = {
- .name = "bpf",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = bpf_mt_check,
- .match = bpf_mt,
- .destroy = bpf_mt_destroy,
- .matchsize = sizeof(struct xt_bpf_info),
- .me = THIS_MODULE,
+static struct xt_match bpf_mt_reg[] __read_mostly = {
+ {
+ .name = "bpf",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check,
+ .match = bpf_mt,
+ .destroy = bpf_mt_destroy,
+ .matchsize = sizeof(struct xt_bpf_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "bpf",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check_v1,
+ .match = bpf_mt_v1,
+ .destroy = bpf_mt_destroy_v1,
+ .matchsize = sizeof(struct xt_bpf_info_v1),
+ .me = THIS_MODULE,
+ },
};
static int __init bpf_mt_init(void)
{
- return xt_register_match(&bpf_mt_reg);
+ return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
static void __exit bpf_mt_exit(void)
{
- xt_unregister_match(&bpf_mt_reg);
+ xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
module_init(bpf_mt_init);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 96fa26b20b67..9a9884a39c0e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
* know, matches should not alter packets, but we are doing this here
* because we would need to add a PKTTYPE target for this sole purpose.
*/
- if (!xt_cluster_is_multicast_addr(skb, par->family) &&
+ if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) &&
skb->pkt_type == PACKET_MULTICAST) {
pskb->pkt_type = PACKET_HOST;
}
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d4bec261e74e..cad0b7b5eb35 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -110,7 +110,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
sinfo->direction != XT_CONNBYTES_DIR_BOTH)
return -EINVAL;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -129,7 +129,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match connbytes_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 03d66f1c5e69..7827128d5a95 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -61,7 +61,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
}
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -70,14 +70,14 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
ret = nf_connlabels_get(par->net, info->bit);
if (ret < 0)
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
return ret;
}
static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
{
nf_connlabels_put(par->net);
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match connlabels_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b6dc322593a3..2aff2b7c4689 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = par->net;
+ struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo;
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
@@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- par->family, net, &tuple)) {
+ xt_family(par), net, &tuple)) {
goto hotdrop;
}
- if (par->family == NFPROTO_IPV6) {
+ if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
&iph->daddr : &iph->saddr, sizeof(addr.ip6));
@@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
connections = count_them(net, info->data, tuple_ptr, &addr,
- &info->mask, par->family, zone);
+ &info->mask, xt_family(par), zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
@@ -368,7 +368,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for "
"address family %u\n", par->family);
@@ -378,7 +378,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
/* init private data */
info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
if (info->data == NULL) {
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
return -ENOMEM;
}
@@ -414,7 +414,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
const struct xt_connlimit_info *info = par->matchinfo;
unsigned int i;
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
destroy_tree(&info->data->climit_root4[i]);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b83e158e116a..9935d5029b0e 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -77,7 +77,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
{
int ret;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -86,7 +86,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static bool
@@ -107,7 +107,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
{
int ret;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -116,7 +116,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target connmark_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index a3b8f697cfc5..c0fb217bc649 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
return false;
if (info->match_flags & XT_CONNTRACK_ORIGSRC)
- if (conntrack_mt_origsrc(ct, info, par->family) ^
+ if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^
!(info->invert_flags & XT_CONNTRACK_ORIGSRC))
return false;
if (info->match_flags & XT_CONNTRACK_ORIGDST)
- if (conntrack_mt_origdst(ct, info, par->family) ^
+ if (conntrack_mt_origdst(ct, info, xt_family(par)) ^
!(info->invert_flags & XT_CONNTRACK_ORIGDST))
return false;
if (info->match_flags & XT_CONNTRACK_REPLSRC)
- if (conntrack_mt_replsrc(ct, info, par->family) ^
+ if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^
!(info->invert_flags & XT_CONNTRACK_REPLSRC))
return false;
if (info->match_flags & XT_CONNTRACK_REPLDST)
- if (conntrack_mt_repldst(ct, info, par->family) ^
+ if (conntrack_mt_repldst(ct, info, xt_family(par)) ^
!(info->invert_flags & XT_CONNTRACK_REPLDST))
return false;
@@ -271,7 +271,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
{
int ret;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -280,7 +280,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match conntrack_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
index d9202cdd25c9..96ebe1cdefec 100644
--- a/net/netfilter/xt_devgroup.c
+++ b/net/netfilter/xt_devgroup.c
@@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_devgroup_info *info = par->matchinfo;
if (info->flags & XT_DEVGROUP_MATCH_SRC &&
- (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
+ (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^
((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
return false;
if (info->flags & XT_DEVGROUP_MATCH_DST &&
- (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
+ (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^
((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
return false;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 64670fc5d0e1..236ac8008909 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_tos_match_info *info = par->matchinfo;
- if (par->family == NFPROTO_IPV4)
+ if (xt_family(par) == NFPROTO_IPV4)
return ((ip_hdr(skb)->tos & info->tos_mask) ==
info->tos_value) ^ !!info->invert;
else
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b89b688e9d01..10063408141d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -49,7 +49,7 @@ struct hashlimit_net {
struct proc_dir_entry *ip6t_hashlimit;
};
-static int hashlimit_net_id;
+static unsigned int hashlimit_net_id;
static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
{
return net_generic(net, hashlimit_net_id);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index f679dd4c272a..38a78151c0e9 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -59,7 +59,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
struct xt_helper_info *info = par->matchinfo;
int ret;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -71,7 +71,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
static void helper_mt_destroy(const struct xt_mtdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match helper_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 71a9d95e0a81..0fdc89064488 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,9 +48,9 @@ static bool
ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_ipvs_mtinfo *data = par->matchinfo;
- struct netns_ipvs *ipvs = net_ipvs(par->net);
+ struct netns_ipvs *ipvs = net_ipvs(xt_net(par));
/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
- const u_int8_t family = par->family;
+ const u_int8_t family = xt_family(par);
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index ac1d3c3d09e7..1cde0e4985b7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -42,29 +42,43 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
e = minfo->ports[++i];
pr_debug("src or dst matches with %d-%d?\n", s, e);
- if (minfo->flags == XT_MULTIPORT_SOURCE
- && src >= s && src <= e)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_DESTINATION
- && dst >= s && dst <= e)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_EITHER
- && ((dst >= s && dst <= e)
- || (src >= s && src <= e)))
- return true ^ minfo->invert;
+ switch (minfo->flags) {
+ case XT_MULTIPORT_SOURCE:
+ if (src >= s && src <= e)
+ return true ^ minfo->invert;
+ break;
+ case XT_MULTIPORT_DESTINATION:
+ if (dst >= s && dst <= e)
+ return true ^ minfo->invert;
+ break;
+ case XT_MULTIPORT_EITHER:
+ if ((dst >= s && dst <= e) ||
+ (src >= s && src <= e))
+ return true ^ minfo->invert;
+ break;
+ default:
+ break;
+ }
} else {
/* exact port matching */
pr_debug("src or dst matches with %d?\n", s);
- if (minfo->flags == XT_MULTIPORT_SOURCE
- && src == s)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_DESTINATION
- && dst == s)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_EITHER
- && (src == s || dst == s))
- return true ^ minfo->invert;
+ switch (minfo->flags) {
+ case XT_MULTIPORT_SOURCE:
+ if (src == s)
+ return true ^ minfo->invert;
+ break;
+ case XT_MULTIPORT_DESTINATION:
+ if (dst == s)
+ return true ^ minfo->invert;
+ break;
+ case XT_MULTIPORT_EITHER:
+ if (src == s || dst == s)
+ return true ^ minfo->invert;
+ break;
+ default:
+ break;
+ }
}
}
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bea7464cc43f..8107b3eb865f 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
par->target->name);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static int xt_nat_checkentry(const struct xt_tgchk_param *par)
+{
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void xt_nat_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static void xt_nat_convert_range(struct nf_nat_range *dst,
@@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.name = "SNAT",
.revision = 0,
.checkentry = xt_nat_checkentry_v0,
+ .destroy = xt_nat_destroy,
.target = xt_snat_target_v0,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.family = NFPROTO_IPV4,
@@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.name = "DNAT",
.revision = 0,
.checkentry = xt_nat_checkentry_v0,
+ .destroy = xt_nat_destroy,
.target = xt_dnat_target_v0,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.family = NFPROTO_IPV4,
@@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
{
.name = "SNAT",
.revision = 1,
+ .checkentry = xt_nat_checkentry,
+ .destroy = xt_nat_destroy,
.target = xt_snat_target_v1,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
@@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
{
.name = "DNAT",
.revision = 1,
+ .checkentry = xt_nat_checkentry,
+ .destroy = xt_nat_destroy,
.target = xt_dnat_target_v1,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index cf327593852a..cc0518fe598e 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
nfnl_acct_update(skb, info->nfacct);
- overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
+ overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
return overquota == NFACCT_UNDERQUOTA ? false : true;
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 2455b69b5810..c05fefcec238 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
unsigned char opts[MAX_IPOPTLEN];
const struct xt_osf_finger *kf;
const struct xt_osf_user_finger *f;
- struct net *net = p->net;
+ struct net *net = xt_net(p);
if (!info)
return false;
@@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
fcount++;
if (info->flags & XT_OSF_LOG)
- nf_log_packet(net, p->family, p->hooknum, skb,
- p->in, p->out, NULL,
+ nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
+ xt_in(p), xt_out(p), NULL,
"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
f->genre, f->version, f->subtype,
&ip->saddr, ntohs(tcp->source),
@@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
- nf_log_packet(net, p->family, p->hooknum, skb, p->in,
- p->out, NULL,
+ nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
+ xt_out(p), NULL,
"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index a20e731b5b6c..16477df45b3b 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
struct sock *sk = skb_to_full_sk(skb);
- struct net *net = par->net;
+ struct net *net = xt_net(par);
if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 5b645cb598fc..57efb703ff18 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (skb->pkt_type != PACKET_LOOPBACK)
type = skb->pkt_type;
- else if (par->family == NFPROTO_IPV4 &&
+ else if (xt_family(par) == NFPROTO_IPV4 &&
ipv4_is_multicast(ip_hdr(skb)->daddr))
type = PACKET_MULTICAST;
- else if (par->family == NFPROTO_IPV6 &&
+ else if (xt_family(par) == NFPROTO_IPV6 &&
ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
type = PACKET_MULTICAST;
else
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index f23e97bb42d7..2b4ab189bba7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
if (info->flags & XT_POLICY_MATCH_IN)
- ret = match_policy_in(skb, info, par->family);
+ ret = match_policy_in(skb, info, xt_family(par));
else
- ret = match_policy_out(skb, info, par->family);
+ ret = match_policy_out(skb, info, xt_family(par));
if (ret < 0)
ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 7720b036d76a..1db02f6fca54 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,35 +18,33 @@ static bool
xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_rateest_match_info *info = par->matchinfo;
- struct gnet_stats_rate_est64 *r;
+ struct gnet_stats_rate_est64 sample = {0};
u_int32_t bps1, bps2, pps1, pps2;
bool ret = true;
- spin_lock_bh(&info->est1->lock);
- r = &info->est1->rstats;
+ gen_estimator_read(&info->est1->rate_est, &sample);
+
if (info->flags & XT_RATEEST_MATCH_DELTA) {
- bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
- pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
+ bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0;
+ pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0;
} else {
- bps1 = r->bps;
- pps1 = r->pps;
+ bps1 = sample.bps;
+ pps1 = sample.pps;
}
- spin_unlock_bh(&info->est1->lock);
if (info->flags & XT_RATEEST_MATCH_ABS) {
bps2 = info->bps2;
pps2 = info->pps2;
} else {
- spin_lock_bh(&info->est2->lock);
- r = &info->est2->rstats;
+ gen_estimator_read(&info->est2->rate_est, &sample);
+
if (info->flags & XT_RATEEST_MATCH_DELTA) {
- bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
- pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
+ bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0;
+ pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0;
} else {
- bps2 = r->bps;
- pps2 = r->pps;
+ bps2 = sample.bps;
+ pps2 = sample.pps;
}
- spin_unlock_bh(&info->est2->lock);
}
switch (info->mode) {
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index e3b7a09b103e..1d89a4eaf841 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -95,7 +95,7 @@ struct recent_net {
#endif
};
-static int recent_net_id __read_mostly;
+static unsigned int recent_net_id __read_mostly;
static inline struct recent_net *recent_pernet(struct net *net)
{
@@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t)
static bool
recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = par->net;
+ struct net *net = xt_net(par);
struct recent_net *recent_net = recent_pernet(net);
const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
struct recent_table *t;
@@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
u_int8_t ttl;
bool ret = info->invert;
- if (par->family == NFPROTO_IPV4) {
+ if (xt_family(par) == NFPROTO_IPV4) {
const struct iphdr *iph = ip_hdr(skb);
if (info->side == XT_RECENT_DEST)
@@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
/* use TTL as seen before forwarding */
- if (par->out != NULL && skb->sk == NULL)
+ if (xt_out(par) != NULL && skb->sk == NULL)
ttl++;
spin_lock_bh(&recent_lock);
@@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
- e = recent_entry_lookup(t, &addr_mask, par->family,
+ e = recent_entry_lookup(t, &addr_mask, xt_family(par),
(info->check_set & XT_RECENT_TTL) ? ttl : 0);
if (e == NULL) {
if (!(info->check_set & XT_RECENT_SET))
goto out;
- e = recent_entry_init(t, &addr_mask, par->family, ttl);
+ e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
if (e == NULL)
par->hotdrop = true;
ret = !ret;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 5669e5b453f4..64285702afd5 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v0 *info = par->matchinfo;
- ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
+ ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
info->match_set.u.compat.flags, 0, UINT_MAX);
return match_set(info->match_set.index, skb, par, &opt,
@@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v1 *info = par->matchinfo;
- ADT_OPT(opt, par->family, info->match_set.dim,
+ ADT_OPT(opt, xt_family(par), info->match_set.dim,
info->match_set.flags, 0, UINT_MAX);
if (opt.flags & IPSET_RETURN_NOMATCH)
@@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v3 *info = par->matchinfo;
int ret;
- ADT_OPT(opt, par->family, info->match_set.dim,
+ ADT_OPT(opt, xt_family(par), info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v4 *info = par->matchinfo;
int ret;
- ADT_OPT(opt, par->family, info->match_set.dim,
+ ADT_OPT(opt, xt_family(par), info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v0 *info = par->targinfo;
- ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
+ ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
info->add_set.u.compat.flags, 0, UINT_MAX);
- ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
+ ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
info->del_set.u.compat.flags, 0, UINT_MAX);
if (info->add_set.index != IPSET_INVALID_ID)
@@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v1 *info = par->targinfo;
- ADT_OPT(add_opt, par->family, info->add_set.dim,
+ ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
info->add_set.flags, 0, UINT_MAX);
- ADT_OPT(del_opt, par->family, info->del_set.dim,
+ ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
info->del_set.flags, 0, UINT_MAX);
if (info->add_set.index != IPSET_INVALID_ID)
@@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v2 *info = par->targinfo;
- ADT_OPT(add_opt, par->family, info->add_set.dim,
+ ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
- ADT_OPT(del_opt, par->family, info->del_set.dim,
+ ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
info->del_set.flags, 0, UINT_MAX);
/* Normalize to fit into jiffies */
@@ -423,17 +423,19 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
/* Revision 3 target */
+#define MOPT(opt, member) ((opt).ext.skbinfo.member)
+
static unsigned int
set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v3 *info = par->targinfo;
int ret;
- ADT_OPT(add_opt, par->family, info->add_set.dim,
+ ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
- ADT_OPT(del_opt, par->family, info->del_set.dim,
+ ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
info->del_set.flags, 0, UINT_MAX);
- ADT_OPT(map_opt, par->family, info->map_set.dim,
+ ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
info->map_set.flags, 0, UINT_MAX);
/* Normalize to fit into jiffies */
@@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
if (!ret)
return XT_CONTINUE;
if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
- skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
- ^ (map_opt.ext.skbmark);
+ skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask))
+ ^ MOPT(map_opt, skbmark);
if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
- skb->priority = map_opt.ext.skbprio;
+ skb->priority = MOPT(map_opt, skbprio);
if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
skb->dev &&
- skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
- skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+ skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue))
+ skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue));
}
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index b10ade272b50..770bbec878f1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,76 +22,14 @@
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
+#include <net/netfilter/nf_socket.h>
#include <linux/netfilter/xt_socket.h>
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#define XT_SOCKET_HAVE_CONNTRACK 1
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
-static int
-extract_icmp4_fields(const struct sk_buff *skb,
- u8 *protocol,
- __be32 *raddr,
- __be32 *laddr,
- __be16 *rport,
- __be16 *lport)
-{
- unsigned int outside_hdrlen = ip_hdrlen(skb);
- struct iphdr *inside_iph, _inside_iph;
- struct icmphdr *icmph, _icmph;
- __be16 *ports, _ports[2];
-
- icmph = skb_header_pointer(skb, outside_hdrlen,
- sizeof(_icmph), &_icmph);
- if (icmph == NULL)
- return 1;
-
- switch (icmph->type) {
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_REDIRECT:
- case ICMP_TIME_EXCEEDED:
- case ICMP_PARAMETERPROB:
- break;
- default:
- return 1;
- }
-
- inside_iph = skb_header_pointer(skb, outside_hdrlen +
- sizeof(struct icmphdr),
- sizeof(_inside_iph), &_inside_iph);
- if (inside_iph == NULL)
- return 1;
-
- if (inside_iph->protocol != IPPROTO_TCP &&
- inside_iph->protocol != IPPROTO_UDP)
- return 1;
-
- ports = skb_header_pointer(skb, outside_hdrlen +
- sizeof(struct icmphdr) +
- (inside_iph->ihl << 2),
- sizeof(_ports), &_ports);
- if (ports == NULL)
- return 1;
-
- /* the inside IP packet is the one quoted from our side, thus
- * its saddr is the local address */
- *protocol = inside_iph->protocol;
- *laddr = inside_iph->saddr;
- *lport = ports[0];
- *raddr = inside_iph->daddr;
- *rport = ports[1];
-
- return 0;
-}
-
/* "socket" match based redirection (no specific rule)
* ===================================================
*
@@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb,
* then local services could intercept traffic going through the
* box.
*/
-static struct sock *
-xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
- const u8 protocol,
- const __be32 saddr, const __be32 daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- return inet_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
- case IPPROTO_UDP:
- return udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- }
- return NULL;
-}
-
-static bool xt_socket_sk_is_transparent(struct sock *sk)
-{
- switch (sk->sk_state) {
- case TCP_TIME_WAIT:
- return inet_twsk(sk)->tw_transparent;
-
- case TCP_NEW_SYN_RECV:
- return inet_rsk(inet_reqsk(sk))->no_srccheck;
-
- default:
- return inet_sk(sk)->transparent;
- }
-}
-
-static struct sock *xt_socket_lookup_slow_v4(struct net *net,
- const struct sk_buff *skb,
- const struct net_device *indev)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct sk_buff *data_skb = NULL;
- int doff = 0;
- __be32 uninitialized_var(daddr), uninitialized_var(saddr);
- __be16 uninitialized_var(dport), uninitialized_var(sport);
- u8 uninitialized_var(protocol);
-#ifdef XT_SOCKET_HAVE_CONNTRACK
- struct nf_conn const *ct;
- enum ip_conntrack_info ctinfo;
-#endif
-
- if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return NULL;
-
- protocol = iph->protocol;
- saddr = iph->saddr;
- sport = hp->source;
- daddr = iph->daddr;
- dport = hp->dest;
- data_skb = (struct sk_buff *)skb;
- doff = iph->protocol == IPPROTO_TCP ?
- ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
- ip_hdrlen(skb) + sizeof(*hp);
-
- } else if (iph->protocol == IPPROTO_ICMP) {
- if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return NULL;
- } else {
- return NULL;
- }
-
-#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in
- * case this is a reply packet of an established
- * SNAT-ted connection.
- */
- ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct) &&
- ((iph->protocol != IPPROTO_ICMP &&
- ctinfo == IP_CT_ESTABLISHED_REPLY) ||
- (iph->protocol == IPPROTO_ICMP &&
- ctinfo == IP_CT_RELATED_REPLY)) &&
- (ct->status & IPS_SRC_NAT_DONE)) {
-
- daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
- dport = (iph->protocol == IPPROTO_TCP) ?
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- }
-#endif
-
- return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
- daddr, sport, dport, indev);
-}
-
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
+ sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
if (sk) {
bool wildcard;
bool transparent = true;
@@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = xt_socket_sk_is_transparent(sk);
+ transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent)
@@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
return socket_match(skb, par, par->matchinfo);
}
-#ifdef XT_SOCKET_HAVE_IPV6
-
-static int
-extract_icmp6_fields(const struct sk_buff *skb,
- unsigned int outside_hdrlen,
- int *protocol,
- const struct in6_addr **raddr,
- const struct in6_addr **laddr,
- __be16 *rport,
- __be16 *lport,
- struct ipv6hdr *ipv6_var)
-{
- const struct ipv6hdr *inside_iph;
- struct icmp6hdr *icmph, _icmph;
- __be16 *ports, _ports[2];
- u8 inside_nexthdr;
- __be16 inside_fragoff;
- int inside_hdrlen;
-
- icmph = skb_header_pointer(skb, outside_hdrlen,
- sizeof(_icmph), &_icmph);
- if (icmph == NULL)
- return 1;
-
- if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
- return 1;
-
- inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
- sizeof(*ipv6_var), ipv6_var);
- if (inside_iph == NULL)
- return 1;
- inside_nexthdr = inside_iph->nexthdr;
-
- inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
- sizeof(*ipv6_var),
- &inside_nexthdr, &inside_fragoff);
- if (inside_hdrlen < 0)
- return 1; /* hjm: Packet has no/incomplete transport layer headers. */
-
- if (inside_nexthdr != IPPROTO_TCP &&
- inside_nexthdr != IPPROTO_UDP)
- return 1;
-
- ports = skb_header_pointer(skb, inside_hdrlen,
- sizeof(_ports), &_ports);
- if (ports == NULL)
- return 1;
-
- /* the inside IP packet is the one quoted from our side, thus
- * its saddr is the local address */
- *protocol = inside_nexthdr;
- *laddr = &inside_iph->saddr;
- *lport = ports[0];
- *raddr = &inside_iph->daddr;
- *rport = ports[1];
-
- return 0;
-}
-
-static struct sock *
-xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
- const u8 protocol,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
- case IPPROTO_UDP:
- return udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- }
-
- return NULL;
-}
-
-static struct sock *xt_socket_lookup_slow_v6(struct net *net,
- const struct sk_buff *skb,
- const struct net_device *indev)
-{
- __be16 uninitialized_var(dport), uninitialized_var(sport);
- const struct in6_addr *daddr = NULL, *saddr = NULL;
- struct ipv6hdr *iph = ipv6_hdr(skb);
- struct sk_buff *data_skb = NULL;
- int doff = 0;
- int thoff = 0, tproto;
-
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0) {
- pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NULL;
- }
-
- if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return NULL;
-
- saddr = &iph->saddr;
- sport = hp->source;
- daddr = &iph->daddr;
- dport = hp->dest;
- data_skb = (struct sk_buff *)skb;
- doff = tproto == IPPROTO_TCP ?
- thoff + __tcp_hdrlen((struct tcphdr *)hp) :
- thoff + sizeof(*hp);
-
- } else if (tproto == IPPROTO_ICMPV6) {
- struct ipv6hdr ipv6_var;
-
- if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
- &sport, &dport, &ipv6_var))
- return NULL;
- } else {
- return NULL;
- }
-
- return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
- sport, dport, indev);
-}
-
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static bool
socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
+ sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
if (sk) {
bool wildcard;
bool transparent = true;
@@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = xt_socket_sk_is_transparent(sk);
+ transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent)
@@ -432,9 +147,28 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
}
#endif
+static int socket_mt_enable_defrag(struct net *net, int family)
+{
+ switch (family) {
+ case NFPROTO_IPV4:
+ return nf_defrag_ipv4_enable(net);
+#ifdef XT_SOCKET_HAVE_IPV6
+ case NFPROTO_IPV6:
+ return nf_defrag_ipv6_enable(net);
+#endif
+ }
+ WARN_ONCE(1, "Unknown family %d\n", family);
+ return 0;
+}
+
static int socket_mt_v1_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ int err;
+
+ err = socket_mt_enable_defrag(par->net, par->family);
+ if (err)
+ return err;
if (info->flags & ~XT_SOCKET_FLAGS_V1) {
pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
@@ -446,6 +180,11 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par)
static int socket_mt_v2_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
+ int err;
+
+ err = socket_mt_enable_defrag(par->net, par->family);
+ if (err)
+ return err;
if (info->flags & ~XT_SOCKET_FLAGS_V2) {
pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
@@ -458,7 +197,11 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo3 *info =
(struct xt_socket_mtinfo3 *)par->matchinfo;
+ int err;
+ err = socket_mt_enable_defrag(par->net, par->family);
+ if (err)
+ return err;
if (info->flags & ~XT_SOCKET_FLAGS_V3) {
pr_info("unknown flags 0x%x\n",
info->flags & ~XT_SOCKET_FLAGS_V3);
@@ -488,7 +231,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 1,
@@ -512,7 +255,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 2,
@@ -536,7 +279,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 3,
@@ -553,11 +296,6 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
static int __init socket_mt_init(void)
{
- nf_defrag_ipv4_enable();
-#ifdef XT_SOCKET_HAVE_IPV6
- nf_defrag_ipv6_enable();
-#endif
-
return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
}
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index a507922d80cd..5746a33789a5 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -43,7 +43,7 @@ static int state_mt_check(const struct xt_mtchk_param *par)
{
int ret;
- ret = nf_ct_l3proto_try_module_get(par->family);
+ ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
@@ -52,7 +52,7 @@ static int state_mt_check(const struct xt_mtchk_param *par)
static void state_mt_destroy(const struct xt_mtdtor_param *par)
{
- nf_ct_l3proto_module_put(par->family);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match state_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 0ae55a36f492..1b01eec1fbda 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -168,7 +168,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* may happen that the same packet matches both rules if
* it arrived at the right moment before 13:00.
*/
- if (skb->tstamp.tv64 == 0)
+ if (skb->tstamp == 0)
__net_timestamp((struct sk_buff *)skb);
stamp = ktime_to_ns(skb->tstamp);
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 2ec93c5e77bb..d177dd066504 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CALIPSO family */
-static struct genl_family netlbl_calipso_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_CALIPSO_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_CALIPSO_A_MAX,
-};
+static struct genl_family netlbl_calipso_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
@@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = {
},
};
+static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_CALIPSO_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_CALIPSO_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_calipso_ops,
+ .n_ops = ARRAY_SIZE(netlbl_calipso_ops),
+};
+
/* NetLabel Generic NETLINK Protocol Functions
*/
@@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
*/
int __init netlbl_calipso_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_calipso_gnl_family,
- netlbl_calipso_ops);
+ return genl_register_family(&netlbl_calipso_gnl_family);
}
static const struct netlbl_calipso_ops *calipso_ops;
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 7fd1104ba900..4149d3e63589 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_cipsov4_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_CIPSOV4_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_CIPSOV4_A_MAX,
-};
-
+static struct genl_family netlbl_cipsov4_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
[NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
@@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
},
};
+static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_CIPSOV4_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_CIPSOV4_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_cipsov4_ops,
+ .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
*/
int __init netlbl_cipsov4_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family,
- netlbl_cipsov4_ops);
+ return genl_register_family(&netlbl_cipsov4_gnl_family);
}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 28c56b95fb7f..ea7c67050792 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1502,10 +1502,7 @@ static int __init netlbl_init(void)
printk(KERN_INFO "NetLabel: Initializing\n");
printk(KERN_INFO "NetLabel: domain hash size = %u\n",
(1 << NETLBL_DOMHSH_BITSIZE));
- printk(KERN_INFO "NetLabel: protocols ="
- " UNLABELED"
- " CIPSOv4"
- "\n");
+ printk(KERN_INFO "NetLabel: protocols = UNLABELED CIPSOv4 CALIPSO\n");
ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
if (ret_val != 0)
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index f85d0e07af2d..21e0095b1d14 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_mgmt_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_MGMT_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_MGMT_A_MAX,
-};
+static struct genl_family netlbl_mgmt_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
@@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
},
};
+static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_MGMT_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_MGMT_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_mgmt_genl_ops,
+ .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
*/
int __init netlbl_mgmt_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_mgmt_gnl_family,
- netlbl_mgmt_genl_ops);
+ return genl_register_family(&netlbl_mgmt_gnl_family);
}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4528cff9138b..22dc1b9d6362 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;
static u8 netlabel_unlabel_acceptflg;
/* NetLabel Generic NETLINK unlabeled family */
-static struct genl_family netlbl_unlabel_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_UNLABELED_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_UNLABEL_A_MAX,
-};
+static struct genl_family netlbl_unlabel_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
@@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
},
};
+static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_UNLABELED_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_UNLABEL_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_unlabel_genl_ops,
+ .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
*/
int __init netlbl_unlabel_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_unlabel_gnl_family,
- netlbl_unlabel_genl_ops);
+ return genl_register_family(&netlbl_unlabel_gnl_family);
}
/*
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 246f29d365c0..161b628ab2b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -40,7 +40,7 @@
#include <linux/net.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
@@ -113,7 +113,7 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
-static ATOMIC_NOTIFIER_HEAD(netlink_chain);
+static BLOCKING_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
static struct list_head netlink_tap_all __read_mostly;
@@ -711,7 +711,7 @@ static int netlink_release(struct socket *sock)
.protocol = sk->sk_protocol,
.portid = nlk->portid,
};
- atomic_notifier_call_chain(&netlink_chain,
+ blocking_notifier_call_chain(&netlink_chain,
NETLINK_URELEASE, &n);
}
@@ -2504,13 +2504,13 @@ static const struct file_operations netlink_seq_fops = {
int netlink_register_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&netlink_chain, nb);
+ return blocking_notifier_chain_register(&netlink_chain, nb);
}
EXPORT_SYMBOL(netlink_register_notifier);
int netlink_unregister_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&netlink_chain, nb);
+ return blocking_notifier_chain_unregister(&netlink_chain, nb);
}
EXPORT_SYMBOL(netlink_unregister_notifier);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 49c28e8ef01b..fb6e10fdb217 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/rwsem.h>
+#include <linux/idr.h>
#include <net/sock.h>
#include <net/genetlink.h>
@@ -58,10 +59,8 @@ static void genl_unlock_all(void)
up_write(&cb_lock);
}
-#define GENL_FAM_TAB_SIZE 16
-#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
+static DEFINE_IDR(genl_fam_idr);
-static struct list_head family_ht[GENL_FAM_TAB_SIZE];
/*
* Bitmap of multicast groups that are currently in use.
*
@@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
static unsigned long *mc_groups = &mc_group_start;
static unsigned long mc_groups_longs = 1;
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id);
-static inline unsigned int genl_family_hash(unsigned int id)
+static const struct genl_family *genl_family_find_byid(unsigned int id)
{
- return id & GENL_FAM_TAB_MASK;
+ return idr_find(&genl_fam_idr, id);
}
-static inline struct list_head *genl_family_chain(unsigned int id)
+static const struct genl_family *genl_family_find_byname(char *name)
{
- return &family_ht[genl_family_hash(id)];
-}
-
-static struct genl_family *genl_family_find_byid(unsigned int id)
-{
- struct genl_family *f;
-
- list_for_each_entry(f, genl_family_chain(id), family_list)
- if (f->id == id)
- return f;
-
- return NULL;
-}
-
-static struct genl_family *genl_family_find_byname(char *name)
-{
- struct genl_family *f;
- int i;
+ const struct genl_family *family;
+ unsigned int id;
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
- list_for_each_entry(f, genl_family_chain(i), family_list)
- if (strcmp(f->name, name) == 0)
- return f;
+ idr_for_each_entry(&genl_fam_idr, family, id)
+ if (strcmp(family->name, name) == 0)
+ return family;
return NULL;
}
-static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
+static const struct genl_ops *genl_get_cmd(u8 cmd,
+ const struct genl_family *family)
{
int i;
@@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
return NULL;
}
-/* Of course we are going to have problems once we hit
- * 2^16 alive types, but that can only happen by year 2K
-*/
-static u16 genl_generate_id(void)
-{
- static u16 id_gen_idx = GENL_MIN_ID;
- int i;
-
- for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) {
- if (id_gen_idx != GENL_ID_VFS_DQUOT &&
- id_gen_idx != GENL_ID_PMCRAID &&
- !genl_family_find_byid(id_gen_idx))
- return id_gen_idx;
- if (++id_gen_idx > GENL_MAX_ID)
- id_gen_idx = GENL_MIN_ID;
- }
-
- return 0;
-}
-
static int genl_allocate_reserve_groups(int n_groups, int *first_id)
{
unsigned long *new_groups;
@@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
return err;
}
-static void genl_unregister_mc_groups(struct genl_family *family)
+static void genl_unregister_mc_groups(const struct genl_family *family)
{
struct net *net;
int i;
@@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family)
}
/**
- * __genl_register_family - register a generic netlink family
+ * genl_register_family - register a generic netlink family
* @family: generic netlink family
*
* Registers the specified family after validating it first. Only one
* family may be registered with the same family name or identifier.
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
*
- * The family's ops array must already be assigned, you can use the
- * genl_register_family_with_ops() helper function.
+ * The family's ops, multicast groups and module pointer must already
+ * be assigned.
*
* Return 0 on success or a negative error code.
*/
-int __genl_register_family(struct genl_family *family)
+int genl_register_family(struct genl_family *family)
{
- int err = -EINVAL, i;
-
- if (family->id && family->id < GENL_MIN_ID)
- goto errout;
-
- if (family->id > GENL_MAX_ID)
- goto errout;
+ int err, i;
+ int start = GENL_START_ALLOC, end = GENL_MAX_ID;
err = genl_validate_ops(family);
if (err)
@@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family)
goto errout_locked;
}
- if (family->id == GENL_ID_GENERATE) {
- u16 newid = genl_generate_id();
-
- if (!newid) {
- err = -ENOMEM;
- goto errout_locked;
- }
-
- family->id = newid;
- } else if (genl_family_find_byid(family->id)) {
- err = -EEXIST;
- goto errout_locked;
+ /*
+ * Sadly, a few cases need to be special-cased
+ * due to them having previously abused the API
+ * and having used their family ID also as their
+ * multicast group ID, so we use reserved IDs
+ * for both to be sure we can do that mapping.
+ */
+ if (family == &genl_ctrl) {
+ /* and this needs to be special for initial family lookups */
+ start = end = GENL_ID_CTRL;
+ } else if (strcmp(family->name, "pmcraid") == 0) {
+ start = end = GENL_ID_PMCRAID;
+ } else if (strcmp(family->name, "VFS_DQUOT") == 0) {
+ start = end = GENL_ID_VFS_DQUOT;
}
if (family->maxattr && !family->parallel_ops) {
@@ -402,11 +360,17 @@ int __genl_register_family(struct genl_family *family)
} else
family->attrbuf = NULL;
+ family->id = idr_alloc(&genl_fam_idr, family,
+ start, end + 1, GFP_KERNEL);
+ if (family->id < 0) {
+ err = family->id;
+ goto errout_locked;
+ }
+
err = genl_validate_assign_mc_groups(family);
if (err)
- goto errout_free;
+ goto errout_remove;
- list_add_tail(&family->family_list, genl_family_chain(family->id));
genl_unlock_all();
/* send all events */
@@ -417,14 +381,14 @@ int __genl_register_family(struct genl_family *family)
return 0;
-errout_free:
+errout_remove:
+ idr_remove(&genl_fam_idr, family->id);
kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
-errout:
return err;
}
-EXPORT_SYMBOL(__genl_register_family);
+EXPORT_SYMBOL(genl_register_family);
/**
* genl_unregister_family - unregister generic netlink family
@@ -434,33 +398,29 @@ EXPORT_SYMBOL(__genl_register_family);
*
* Returns 0 on success or a negative error code.
*/
-int genl_unregister_family(struct genl_family *family)
+int genl_unregister_family(const struct genl_family *family)
{
- struct genl_family *rc;
-
genl_lock_all();
- list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
- if (family->id != rc->id || strcmp(rc->name, family->name))
- continue;
+ if (!genl_family_find_byid(family->id)) {
+ genl_unlock_all();
+ return -ENOENT;
+ }
- genl_unregister_mc_groups(family);
+ genl_unregister_mc_groups(family);
- list_del(&rc->family_list);
- family->n_ops = 0;
- up_write(&cb_lock);
- wait_event(genl_sk_destructing_waitq,
- atomic_read(&genl_sk_destructing_cnt) == 0);
- genl_unlock();
+ idr_remove(&genl_fam_idr, family->id);
- kfree(family->attrbuf);
- genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
- return 0;
- }
+ up_write(&cb_lock);
+ wait_event(genl_sk_destructing_waitq,
+ atomic_read(&genl_sk_destructing_cnt) == 0);
+ genl_unlock();
- genl_unlock_all();
+ kfree(family->attrbuf);
+
+ genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
- return -ENOENT;
+ return 0;
}
EXPORT_SYMBOL(genl_unregister_family);
@@ -476,7 +436,7 @@ EXPORT_SYMBOL(genl_unregister_family);
* Returns pointer to user specific header
*/
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
- struct genl_family *family, int flags, u8 cmd)
+ const struct genl_family *family, int flags, u8 cmd)
{
struct nlmsghdr *nlh;
struct genlmsghdr *hdr;
@@ -535,7 +495,7 @@ static int genl_lock_done(struct netlink_callback *cb)
return rc;
}
-static int genl_family_rcv_msg(struct genl_family *family,
+static int genl_family_rcv_msg(const struct genl_family *family,
struct sk_buff *skb,
struct nlmsghdr *nlh)
{
@@ -647,7 +607,7 @@ out:
static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct genl_family *family;
+ const struct genl_family *family;
int err;
family = genl_family_find_byid(nlh->nlmsg_type);
@@ -676,15 +636,9 @@ static void genl_rcv(struct sk_buff *skb)
* Controller
**************************************************************************/
-static struct genl_family genl_ctrl = {
- .id = GENL_ID_CTRL,
- .name = "nlctrl",
- .version = 0x2,
- .maxattr = CTRL_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family genl_ctrl;
-static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
+static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
void *hdr;
@@ -771,7 +725,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ctrl_fill_mcgrp_info(struct genl_family *family,
+static int ctrl_fill_mcgrp_info(const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id, u32 portid, u32 seq, u32 flags,
struct sk_buff *skb, u8 cmd)
@@ -814,37 +768,30 @@ nla_put_failure:
static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
{
-
- int i, n = 0;
+ int n = 0;
struct genl_family *rt;
struct net *net = sock_net(skb->sk);
- int chains_to_skip = cb->args[0];
- int fams_to_skip = cb->args[1];
-
- for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
- n = 0;
- list_for_each_entry(rt, genl_family_chain(i), family_list) {
- if (!rt->netnsok && !net_eq(net, &init_net))
- continue;
- if (++n < fams_to_skip)
- continue;
- if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- skb, CTRL_CMD_NEWFAMILY) < 0)
- goto errout;
- }
+ int fams_to_skip = cb->args[0];
+ unsigned int id;
- fams_to_skip = 0;
- }
+ idr_for_each_entry(&genl_fam_idr, rt, id) {
+ if (!rt->netnsok && !net_eq(net, &init_net))
+ continue;
+
+ if (n++ < fams_to_skip)
+ continue;
-errout:
- cb->args[0] = i;
- cb->args[1] = n;
+ if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, CTRL_CMD_NEWFAMILY) < 0)
+ break;
+ }
+ cb->args[0] = n;
return skb->len;
}
-static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
+static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
u32 portid, int seq, u8 cmd)
{
struct sk_buff *skb;
@@ -864,7 +811,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
}
static struct sk_buff *
-ctrl_build_mcgrp_msg(struct genl_family *family,
+ctrl_build_mcgrp_msg(const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id, u32 portid, int seq, u8 cmd)
{
@@ -894,7 +841,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = {
static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
- struct genl_family *res = NULL;
+ const struct genl_family *res = NULL;
int err = -EINVAL;
if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
@@ -938,7 +885,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id)
{
@@ -992,27 +939,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
+static struct genl_family genl_ctrl __ro_after_init = {
+ .module = THIS_MODULE,
+ .ops = genl_ctrl_ops,
+ .n_ops = ARRAY_SIZE(genl_ctrl_ops),
+ .mcgrps = genl_ctrl_groups,
+ .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
+ .id = GENL_ID_CTRL,
+ .name = "nlctrl",
+ .version = 0x2,
+ .maxattr = CTRL_ATTR_MAX,
+ .netnsok = true,
+};
+
static int genl_bind(struct net *net, int group)
{
- int i, err = -ENOENT;
+ struct genl_family *f;
+ int err = -ENOENT;
+ unsigned int id;
down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
-
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (!f->netnsok && net != &init_net)
- err = -ENOENT;
- else if (f->mcast_bind)
- err = f->mcast_bind(net, fam_grp);
- else
- err = 0;
- break;
- }
+
+ idr_for_each_entry(&genl_fam_idr, f, id) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (!f->netnsok && net != &init_net)
+ err = -ENOENT;
+ else if (f->mcast_bind)
+ err = f->mcast_bind(net, fam_grp);
+ else
+ err = 0;
+ break;
}
}
up_read(&cb_lock);
@@ -1022,21 +981,19 @@ static int genl_bind(struct net *net, int group)
static void genl_unbind(struct net *net, int group)
{
- int i;
+ struct genl_family *f;
+ unsigned int id;
down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
+ idr_for_each_entry(&genl_fam_idr, f, id) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
- if (f->mcast_unbind)
- f->mcast_unbind(net, fam_grp);
- break;
- }
+ if (f->mcast_unbind)
+ f->mcast_unbind(net, fam_grp);
+ break;
}
}
up_read(&cb_lock);
@@ -1076,13 +1033,9 @@ static struct pernet_operations genl_pernet_ops = {
static int __init genl_init(void)
{
- int i, err;
-
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
- INIT_LIST_HEAD(&family_ht[i]);
+ int err;
- err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops,
- genl_ctrl_groups);
+ err = genl_register_family(&genl_ctrl);
if (err < 0)
goto problem;
@@ -1098,6 +1051,25 @@ problem:
subsys_initcall(genl_init);
+/**
+ * genl_family_attrbuf - return family's attrbuf
+ * @family: the family
+ *
+ * Return the family's attrbuf, while validating that it's
+ * actually valid to access it.
+ *
+ * You cannot use this function with a family that has parallel_ops
+ * and you can only use it within (pre/post) doit/dumpit callbacks.
+ */
+struct nlattr **genl_family_attrbuf(const struct genl_family *family)
+{
+ if (!WARN_ON(family->parallel_ops))
+ lockdep_assert_held(&genl_mutex);
+
+ return family->attrbuf;
+}
+EXPORT_SYMBOL(genl_family_attrbuf);
+
static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
gfp_t flags)
{
@@ -1127,8 +1099,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
return err;
}
-int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
+int genlmsg_multicast_allns(const struct genl_family *family,
+ struct sk_buff *skb, u32 portid,
+ unsigned int group, gfp_t flags)
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
@@ -1137,7 +1110,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
-void genl_notify(struct genl_family *family, struct sk_buff *skb,
+void genl_notify(const struct genl_family *family, struct sk_buff *skb,
struct genl_info *info, u32 group, gfp_t flags)
{
struct net *net = genl_info_net(info);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ea023b35f1c2..03f3d5c7beb8 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
{ .name = NFC_GENL_MCAST_EVENT_NAME, },
};
-static struct genl_family nfc_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NFC_GENL_NAME,
- .version = NFC_GENL_VERSION,
- .maxattr = NFC_ATTR_MAX,
-};
-
+static struct genl_family nfc_genl_family;
static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
@@ -120,21 +113,20 @@ nla_put_failure:
static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
struct nfc_dev *dev;
int rc;
u32 idx;
rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
- nfc_genl_family.attrbuf,
- nfc_genl_family.maxattr,
- nfc_genl_policy);
+ attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
if (rc < 0)
return ERR_PTR(rc);
- if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX])
+ if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
@@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = {
},
};
+static struct genl_family nfc_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NFC_GENL_NAME,
+ .version = NFC_GENL_VERSION,
+ .maxattr = NFC_ATTR_MAX,
+ .module = THIS_MODULE,
+ .ops = nfc_genl_ops,
+ .n_ops = ARRAY_SIZE(nfc_genl_ops),
+ .mcgrps = nfc_genl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
+};
+
struct urelease_work {
struct work_struct w;
@@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void)
{
int rc;
- rc = genl_register_family_with_ops_groups(&nfc_genl_family,
- nfc_genl_ops,
- nfc_genl_mcgrps);
+ rc = genl_register_family(&nfc_genl_family);
if (rc)
return rc;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4e03f64709bc..514f7bcf7c63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -62,9 +62,11 @@ struct ovs_frag_data {
struct vport *vport;
struct ovs_skb_cb cb;
__be16 inner_protocol;
- __u16 vlan_tci;
+ u16 network_offset; /* valid only for MPLS */
+ u16 vlan_tci;
__be16 vlan_proto;
unsigned int l2_len;
+ u8 mac_proto;
u8 l2_data[MAX_L2_LEN];
};
@@ -136,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
static void invalidate_flow_key(struct sw_flow_key *key)
{
- key->eth.type = htons(0);
+ key->mac_proto |= SW_FLOW_KEY_INVALID;
}
static bool is_flow_key_valid(const struct sw_flow_key *key)
{
- return !!key->eth.type;
+ return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -185,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
- update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
+ if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
+ update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
skb->protocol = mpls->mpls_ethertype;
invalidate_flow_key(key);
@@ -195,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
const __be16 ethertype)
{
- struct ethhdr *hdr;
int err;
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -211,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb->mac_len);
- /* mpls_hdr() is used to locate the ethertype field correctly in the
- * presence of VLAN tags.
- */
- hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
- update_ethertype(skb, hdr, ethertype);
+ if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
+ struct ethhdr *hdr;
+
+ /* mpls_hdr() is used to locate the ethertype field correctly in the
+ * presence of VLAN tags.
+ */
+ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
+ update_ethertype(skb, hdr, ethertype);
+ }
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
@@ -311,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
+/* pop_eth does not support VLAN packets as this action is never called
+ * for them.
+ */
+static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ skb_pull_rcsum(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+
+ /* safe right before invalidate_flow_key */
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
+static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_action_push_eth *ethh)
+{
+ struct ethhdr *hdr;
+
+ /* Add the new Ethernet header */
+ if (skb_cow_head(skb, ETH_HLEN) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+
+ hdr = eth_hdr(skb);
+ ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
+ ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
+ hdr->h_proto = skb->protocol;
+
+ skb_postpush_rcsum(skb, hdr, ETH_HLEN);
+
+ /* safe right before invalidate_flow_key */
+ key->mac_proto = MAC_PROTO_ETHERNET;
+ invalidate_flow_key(key);
+ return 0;
+}
+
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
__be32 addr, __be32 new_addr)
{
@@ -666,7 +713,13 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
skb_postpush_rcsum(skb, skb->data, data->l2_len);
skb_reset_mac_header(skb);
- ovs_vport_send(vport, skb);
+ if (eth_p_mpls(skb->protocol)) {
+ skb->inner_network_header = skb->network_header;
+ skb_set_network_header(skb, data->network_offset);
+ skb_reset_mac_len(skb);
+ }
+
+ ovs_vport_send(vport, skb, data->mac_proto);
return 0;
}
@@ -684,7 +737,8 @@ static struct dst_ops ovs_dst_ops = {
/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
* ovs_vport_output(), which is called once per fragmented packet.
*/
-static void prepare_frag(struct vport *vport, struct sk_buff *skb)
+static void prepare_frag(struct vport *vport, struct sk_buff *skb,
+ u16 orig_network_offset, u8 mac_proto)
{
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
@@ -694,8 +748,10 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
data->vport = vport;
data->cb = *OVS_CB(skb);
data->inner_protocol = skb->inner_protocol;
+ data->network_offset = orig_network_offset;
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
+ data->mac_proto = mac_proto;
data->l2_len = hlen;
memcpy(&data->l2_data, skb->data, hlen);
@@ -704,18 +760,27 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
}
static void ovs_fragment(struct net *net, struct vport *vport,
- struct sk_buff *skb, u16 mru, __be16 ethertype)
+ struct sk_buff *skb, u16 mru,
+ struct sw_flow_key *key)
{
+ u16 orig_network_offset = 0;
+
+ if (eth_p_mpls(skb->protocol)) {
+ orig_network_offset = skb_network_offset(skb);
+ skb->network_header = skb->inner_network_header;
+ }
+
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
goto err;
}
- if (ethertype == htons(ETH_P_IP)) {
+ if (key->eth.type == htons(ETH_P_IP)) {
struct dst_entry ovs_dst;
unsigned long orig_dst;
- prepare_frag(vport, skb);
+ prepare_frag(vport, skb, orig_network_offset,
+ ovs_key_mac_proto(key));
dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_dst.dev = vport->dev;
@@ -726,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
- } else if (ethertype == htons(ETH_P_IPV6)) {
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
unsigned long orig_dst;
struct rt6_info ovs_rt;
@@ -735,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
goto err;
}
- prepare_frag(vport, skb);
+ prepare_frag(vport, skb, orig_network_offset,
+ ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -749,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
- ovs_vport_name(vport), ntohs(ethertype), mru,
+ ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
goto err;
}
@@ -769,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
u32 cutlen = OVS_CB(skb)->cutlen;
if (unlikely(cutlen > 0)) {
- if (skb->len - cutlen > ETH_HLEN)
+ if (skb->len - cutlen > ovs_mac_header_len(key))
pskb_trim(skb, skb->len - cutlen);
else
- pskb_trim(skb, ETH_HLEN);
+ pskb_trim(skb, ovs_mac_header_len(key));
}
- if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
- ovs_vport_send(vport, skb);
+ if (likely(!mru ||
+ (skb->len <= mru + vport->dev->hard_header_len))) {
+ ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
} else if (mru <= vport->dev->mtu) {
struct net *net = read_pnet(&dp->net);
- __be16 ethertype = key->eth.type;
-
- if (!is_flow_key_valid(key)) {
- if (eth_p_mpls(skb->protocol))
- ethertype = skb->inner_protocol;
- else
- ethertype = vlan_get_protocol(skb);
- }
- ovs_fragment(net, vport, skb, mru, ethertype);
+ ovs_fragment(net, vport, skb, mru, key);
} else {
kfree_skb(skb);
}
@@ -1182,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (err)
return err == -EINPROGRESS ? 0 : err;
break;
+
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ err = push_eth(skb, key, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_POP_ETH:
+ err = pop_eth(skb, key);
+ break;
}
if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index fecefa2dc94e..54253ea5976e 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
int hooknum, nh_off, err = NF_ACCEPT;
nh_off = skb_network_offset(skb);
- skb_pull(skb, nh_off);
+ skb_pull_rcsum(skb, nh_off);
/* See HOOK2MANIP(). */
if (maniptype == NF_NAT_MANIP_SRC)
@@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
err = nf_nat_packet(ct, ctinfo, hooknum, skb);
push:
skb_push(skb, nh_off);
+ skb_postpush_rcsum(skb, skb->data, nh_off);
return err;
}
@@ -728,12 +729,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
skb->nfctinfo = IP_CT_NEW;
}
- /* Repeat if requested, see nf_iterate(). */
- do {
- err = nf_conntrack_in(net, info->family,
- NF_INET_PRE_ROUTING, skb);
- } while (err == NF_REPEAT);
-
+ err = nf_conntrack_in(net, info->family,
+ NF_INET_PRE_ROUTING, skb);
if (err != NF_ACCEPT)
return -ENOENT;
@@ -890,7 +887,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
/* The conntrack module expects to be working at L3. */
nh_ofs = skb_network_offset(skb);
- skb_pull(skb, nh_ofs);
+ skb_pull_rcsum(skb, nh_ofs);
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
err = handle_fragments(net, key, info->zone.id, skb);
@@ -904,6 +901,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
err = ovs_ct_lookup(net, key, info, skb);
skb_push(skb, nh_ofs);
+ skb_postpush_rcsum(skb, skb->data, nh_ofs);
if (err)
kfree_skb(skb);
return err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d67ea856067..9c62b6325f7a 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -58,8 +58,7 @@
#include "vport-internal_dev.h"
#include "vport-netdev.h"
-int ovs_net_id __read_mostly;
-EXPORT_SYMBOL_GPL(ovs_net_id);
+unsigned int ovs_net_id __read_mostly;
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@@ -131,7 +130,6 @@ int lockdep_ovsl_is_held(void)
else
return 1;
}
-EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
#endif
static struct vport *new_vport(const struct vport_parms *);
@@ -562,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct datapath *dp;
- struct ethhdr *eth;
struct vport *input_vport;
u16 mru = 0;
int len;
@@ -583,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
- skb_reset_mac_header(packet);
- eth = eth_hdr(packet);
-
- /* Normally, setting the skb 'protocol' field would be handled by a
- * call to eth_type_trans(), but it assumes there's a sending
- * device, which we may not have. */
- if (eth_proto_is_802_3(eth->h_proto))
- packet->protocol = eth->h_proto;
- else
- packet->protocol = htons(ETH_P_802_2);
-
/* Set packet's mru */
if (a[OVS_PACKET_ATTR_MRU]) {
mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -672,8 +658,7 @@ static const struct genl_ops dp_packet_genl_ops[] = {
}
};
-static struct genl_family dp_packet_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_packet_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
@@ -682,6 +667,7 @@ static struct genl_family dp_packet_genl_family = {
.parallel_ops = true,
.ops = dp_packet_genl_ops,
.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+ .module = THIS_MODULE,
};
static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
@@ -1437,8 +1423,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
},
};
-static struct genl_family dp_flow_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_flow_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
@@ -1449,6 +1434,7 @@ static struct genl_family dp_flow_genl_family = {
.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
.mcgrps = &ovs_dp_flow_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
static size_t ovs_dp_cmd_msg_size(void)
@@ -1823,8 +1809,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
},
};
-static struct genl_family dp_datapath_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_datapath_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
@@ -1835,6 +1820,7 @@ static struct genl_family dp_datapath_genl_family = {
.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
.mcgrps = &ovs_dp_datapath_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
/* Called with ovs_mutex or RCU read lock. */
@@ -2245,8 +2231,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
},
};
-struct genl_family dp_vport_genl_family = {
- .id = GENL_ID_GENERATE,
+struct genl_family dp_vport_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
@@ -2257,6 +2242,7 @@ struct genl_family dp_vport_genl_family = {
.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
.mcgrps = &ovs_dp_vport_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
static struct genl_family * const dp_genl_families[] = {
@@ -2274,7 +2260,7 @@ static void dp_unregister_genl(int n_families)
genl_unregister_family(dp_genl_families[i]);
}
-static int dp_register_genl(void)
+static int __init dp_register_genl(void)
{
int err;
int i;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index ab85c1cae255..1c6e9377436d 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -144,7 +144,7 @@ struct ovs_net {
bool xt_label;
};
-extern int ovs_net_id;
+extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 22087062bd10..2c0a00f7f1b7 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
* Returns 0 if it encounters a non-vlan or incomplete packet.
* Returns 1 after successfully parsing vlan tag.
*/
-static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
+ bool untag_vlan)
{
struct vlan_head *vh = (struct vlan_head *)skb->data;
@@ -330,31 +331,47 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
key_vh->tpid = vh->tpid;
- __skb_pull(skb, sizeof(struct vlan_head));
+ if (unlikely(untag_vlan)) {
+ int offset = skb->data - skb_mac_header(skb);
+ u16 tci;
+ int err;
+
+ __skb_push(skb, offset);
+ err = __skb_vlan_pop(skb, &tci);
+ __skb_pull(skb, offset);
+ if (err)
+ return err;
+ __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
+ } else {
+ __skb_pull(skb, sizeof(struct vlan_head));
+ }
return 1;
}
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+static void clear_vlan(struct sw_flow_key *key)
{
- int res;
-
key->eth.vlan.tci = 0;
key->eth.vlan.tpid = 0;
key->eth.cvlan.tci = 0;
key->eth.cvlan.tpid = 0;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int res;
if (skb_vlan_tag_present(skb)) {
key->eth.vlan.tci = htons(skb->vlan_tci);
key->eth.vlan.tpid = skb->vlan_proto;
} else {
/* Parse outer vlan tag in the non-accelerated case. */
- res = parse_vlan_tag(skb, &key->eth.vlan);
+ res = parse_vlan_tag(skb, &key->eth.vlan, true);
if (res <= 0)
return res;
}
/* Parse inner vlan tag. */
- res = parse_vlan_tag(skb, &key->eth.cvlan);
+ res = parse_vlan_tag(skb, &key->eth.cvlan, false);
if (res <= 0)
return res;
@@ -483,17 +500,20 @@ invalid:
*
* Returns 0 if successful, otherwise a negative errno value.
*
- * Initializes @skb header pointers as follows:
+ * Initializes @skb header fields as follows:
*
- * - skb->mac_header: the Ethernet header.
+ * - skb->mac_header: the L2 header.
*
- * - skb->network_header: just past the Ethernet header, or just past the
- * VLAN header, to the first byte of the Ethernet payload.
+ * - skb->network_header: just past the L2 header, or just past the
+ * VLAN header, to the first byte of the L2 payload.
*
* - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
* on output, then just past the IP header, if one is present and
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
+ *
+ * - skb->protocol: the type of the data starting at skb->network_header.
+ * Equals to key->eth.type.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
@@ -505,28 +525,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
skb_reset_mac_header(skb);
- /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
- * header in the linear data area.
- */
- eth = eth_hdr(skb);
- ether_addr_copy(key->eth.src, eth->h_source);
- ether_addr_copy(key->eth.dst, eth->h_dest);
+ /* Link layer. */
+ clear_vlan(key);
+ if (key->mac_proto == MAC_PROTO_NONE) {
+ if (unlikely(eth_type_vlan(skb->protocol)))
+ return -EINVAL;
- __skb_pull(skb, 2 * ETH_ALEN);
- /* We are going to push all headers that we pull, so no need to
- * update skb->csum here.
- */
+ skb_reset_network_header(skb);
+ } else {
+ eth = eth_hdr(skb);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
- if (unlikely(parse_vlan(skb, key)))
- return -ENOMEM;
+ __skb_pull(skb, 2 * ETH_ALEN);
+ /* We are going to push all headers that we pull, so no need to
+ * update skb->csum here.
+ */
- key->eth.type = parse_ethertype(skb);
- if (unlikely(key->eth.type == htons(0)))
- return -ENOMEM;
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
+
+ skb->protocol = parse_ethertype(skb);
+ if (unlikely(skb->protocol == htons(0)))
+ return -ENOMEM;
- skb_reset_network_header(skb);
+ skb_reset_network_header(skb);
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+ }
skb_reset_mac_len(skb);
- __skb_push(skb, skb->data - skb_mac_header(skb));
+ key->eth.type = skb->protocol;
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,9 +748,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
return key_extract(skb, key);
}
+static int key_extract_mac_proto(struct sk_buff *skb)
+{
+ switch (skb->dev->type) {
+ case ARPHRD_ETHER:
+ return MAC_PROTO_ETHERNET;
+ case ARPHRD_NONE:
+ if (skb->protocol == htons(ETH_P_TEB))
+ return MAC_PROTO_ETHERNET;
+ return MAC_PROTO_NONE;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
+ int res;
+
/* Extract metadata from packet. */
if (tun_info) {
key->tun_proto = ip_tunnel_info_af(tun_info);
@@ -751,6 +794,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.skb_mark = skb->mark;
ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
+ res = key_extract_mac_proto(skb);
+ if (res < 0)
+ return res;
+ key->mac_proto = res;
key->recirc_id = 0;
return key_extract(skb, key);
@@ -767,5 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
if (err)
return err;
+ /* key_extract assumes that skb->protocol is set-up for
+ * layer 3 packets which is the case for other callers,
+ * in particular packets received from the network stack.
+ * Here the correct value can be set from the metadata
+ * extracted above.
+ * For L2 packet key eth type would be zero. skb protocol
+ * would be set to correct value later during key-extact.
+ */
+
+ skb->protocol = key->eth.type;
return key_extract(skb, key);
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ae783f5c6695..f61cae7f9030 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,6 +37,12 @@
struct sk_buff;
+enum sw_flow_mac_proto {
+ MAC_PROTO_NONE = 0,
+ MAC_PROTO_ETHERNET,
+};
+#define SW_FLOW_KEY_INVALID 0x80
+
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
@@ -68,6 +74,7 @@ struct sw_flow_key {
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
+ u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
u8 tun_proto; /* Protocol of encapsulating tunnel. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
@@ -206,6 +213,21 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
+static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
+{
+ return key->mac_proto & ~SW_FLOW_KEY_INVALID;
+}
+
+static inline u16 __ovs_mac_header_len(u8 mac_proto)
+{
+ return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
+}
+
+static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
+{
+ return __ovs_mac_header_len(ovs_key_mac_proto(key));
+}
+
static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
return sfid->ufid_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ae25ded82b3b..c87d359b9b37 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match,
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
- u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+ u64 key_expected = 0;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
/* The following mask attributes allowed only if they
@@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
return 0;
}
+static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
+ u64 *attrs, const struct nlattr **a,
+ bool is_mask, bool log)
+{
+ __be16 eth_type;
+
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+ if (is_mask) {
+ /* Always exact match EtherType. */
+ eth_type = htons(0xffff);
+ } else if (!eth_proto_is_802_3(eth_type)) {
+ OVS_NLERR(log, "EtherType %x is less than min %x",
+ ntohs(eth_type), ETH_P_802_3_MIN);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ return 0;
+}
+
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 *attrs, const struct nlattr **a,
bool is_mask, bool log)
{
+ u8 mac_proto = MAC_PROTO_ETHERNET;
+
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
sizeof(*cl), is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
}
+
+ /* For layer 3 packets the Ethernet type is provided
+ * and treated as metadata but no MAC addresses are provided.
+ */
+ if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+ (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
+ mac_proto = MAC_PROTO_NONE;
+
+ /* Always exact match mac_proto */
+ SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
+
+ if (mac_proto == MAC_PROTO_NONE)
+ return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
+ log);
+
return 0;
}
@@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
SW_FLOW_KEY_MEMCPY(match, eth.dst,
eth_key->eth_dst, ETH_ALEN, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
- }
- if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
- /* VLAN attribute is always parsed before getting here since it
- * may occur multiple times.
- */
- OVS_NLERR(log, "VLAN attribute unexpected.");
- return -EINVAL;
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
- __be16 eth_type;
-
- eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (is_mask) {
- /* Always exact match EtherType. */
- eth_type = htons(0xffff);
- } else if (!eth_proto_is_802_3(eth_type)) {
- OVS_NLERR(log, "EtherType %x is less than min %x",
- ntohs(eth_type), ETH_P_802_3_MIN);
+ if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
+ /* VLAN attribute is always parsed before getting here since it
+ * may occur multiple times.
+ */
+ OVS_NLERR(log, "VLAN attribute unexpected.");
return -EINVAL;
}
- SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- } else if (!is_mask) {
- SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+ err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
+ log);
+ if (err)
+ return err;
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+ }
+ } else if (!match->key->eth.type) {
+ OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
+ return -EINVAL;
}
if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
@@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (ovs_ct_put_key(output, skb))
goto nla_put_failure;
- nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
- if (!nla)
- goto nla_put_failure;
-
- eth_key = nla_data(nla);
- ether_addr_copy(eth_key->eth_src, output->eth.src);
- ether_addr_copy(eth_key->eth_dst, output->eth.dst);
-
- if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
- if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
+ if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+ if (!nla)
goto nla_put_failure;
- encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.vlan.tci)
- goto unencap;
- if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
- if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+ eth_key = nla_data(nla);
+ ether_addr_copy(eth_key->eth_src, output->eth.src);
+ ether_addr_copy(eth_key->eth_dst, output->eth.dst);
+
+ if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+ if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
goto nla_put_failure;
- in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.cvlan.tci)
+ encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+ if (!swkey->eth.vlan.tci)
goto unencap;
+
+ if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+ if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+ goto nla_put_failure;
+ in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+ if (!swkey->eth.cvlan.tci)
+ goto unencap;
+ }
}
- }
- if (swkey->eth.type == htons(ETH_P_802_2)) {
- /*
- * Ethertype 802.2 is represented in the netlink with omitted
- * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
- * 0xffff in the mask attribute. Ethertype can also
- * be wildcarded.
- */
- if (is_mask && output->eth.type)
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
- output->eth.type))
- goto nla_put_failure;
- goto unencap;
+ if (swkey->eth.type == htons(ETH_P_802_2)) {
+ /*
+ * Ethertype 802.2 is represented in the netlink with omitted
+ * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+ * 0xffff in the mask attribute. Ethertype can also
+ * be wildcarded.
+ */
+ if (is_mask && output->eth.type)
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+ output->eth.type))
+ goto nla_put_failure;
+ goto unencap;
+ }
}
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
@@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
- struct sw_flow_actions **sfa,
- bool *skip_copy, __be16 eth_type, bool masked, bool log)
+ struct sw_flow_actions **sfa, bool *skip_copy,
+ u8 mac_proto, __be16 eth_type, bool masked, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@@ -2157,7 +2190,11 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
+ break;
+
case OVS_KEY_ATTR_ETHERNET:
+ if (mac_proto != MAC_PROTO_ETHERNET)
+ return -EINVAL;
break;
case OVS_KEY_ATTR_TUNNEL:
@@ -2324,6 +2361,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
+ u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
@@ -2346,6 +2384,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
+ [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
+ [OVS_ACTION_ATTR_POP_ETH] = 0,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2394,10 +2434,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
}
case OVS_ACTION_ATTR_POP_VLAN:
+ if (mac_proto != MAC_PROTO_ETHERNET)
+ return -EINVAL;
vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
+ if (mac_proto != MAC_PROTO_ETHERNET)
+ return -EINVAL;
vlan = nla_data(a);
if (!eth_type_vlan(vlan->vlan_tpid))
return -EINVAL;
@@ -2447,14 +2491,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
- &skip_copy, eth_type, false, log);
+ &skip_copy, mac_proto, eth_type,
+ false, log);
if (err)
return err;
break;
case OVS_ACTION_ATTR_SET_MASKED:
err = validate_set(a, key, sfa,
- &skip_copy, eth_type, true, log);
+ &skip_copy, mac_proto, eth_type,
+ true, log);
if (err)
return err;
break;
@@ -2474,6 +2520,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ /* Disallow pushing an Ethernet header if one
+ * is already present */
+ if (mac_proto != MAC_PROTO_NONE)
+ return -EINVAL;
+ mac_proto = MAC_PROTO_NONE;
+ break;
+
+ case OVS_ACTION_ATTR_POP_ETH:
+ if (mac_proto != MAC_PROTO_ETHERNET)
+ return -EINVAL;
+ if (vlan_tci & htons(VLAN_TAG_PRESENT))
+ return -EINVAL;
+ mac_proto = MAC_PROTO_ETHERNET;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index e7da29021b38..d5d6caecd072 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = {
.get_link = ethtool_op_get_link,
};
-static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
-{
- if (new_mtu < 68)
- return -EINVAL;
-
- netdev->mtu = new_mtu;
- return 0;
-}
-
static void internal_dev_destructor(struct net_device *dev)
{
struct vport *vport = ovs_internal_dev_get_vport(dev);
@@ -148,7 +139,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_get_stats,
.ndo_set_rx_headroom = internal_set_rx_headroom,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4e3972344aa6..0389398fa4ab 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb)
if (unlikely(!skb))
return;
- skb_push(skb, ETH_HLEN);
- skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+ if (skb->dev->type == ARPHRD_ETHER) {
+ skb_push(skb, ETH_HLEN);
+ skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+ }
ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
@@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
}
if (vport->dev->flags & IFF_LOOPBACK ||
- vport->dev->type != ARPHRD_ETHER ||
+ (vport->dev->type != ARPHRD_ETHER &&
+ vport->dev->type != ARPHRD_NONE) ||
ovs_is_internal_dev(vport->dev)) {
err = -EINVAL;
goto error_put;
@@ -162,7 +165,6 @@ void ovs_netdev_detach_dev(struct vport *vport)
netdev_master_upper_dev_get(vport->dev));
dev_set_promiscuity(vport->dev, -1);
}
-EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev);
static void netdev_destroy(struct vport *vport)
{
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7387418ac514..b6c8524032a0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -463,27 +463,11 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
ovs_dp_process_packet(skb, &key);
return 0;
}
-EXPORT_SYMBOL_GPL(ovs_vport_receive);
-static void free_vport_rcu(struct rcu_head *rcu)
+static unsigned int packet_length(const struct sk_buff *skb,
+ struct net_device *dev)
{
- struct vport *vport = container_of(rcu, struct vport, rcu);
-
- ovs_vport_free(vport);
-}
-
-void ovs_vport_deferred_free(struct vport *vport)
-{
- if (!vport)
- return;
-
- call_rcu(&vport->rcu, free_vport_rcu);
-}
-EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-
-static unsigned int packet_length(const struct sk_buff *skb)
-{
- unsigned int length = skb->len - ETH_HLEN;
+ unsigned int length = skb->len - dev->hard_header_len;
if (!skb_vlan_tag_present(skb) &&
eth_type_vlan(skb->protocol))
@@ -497,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb)
return length;
}
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
{
int mtu = vport->dev->mtu;
- if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+ switch (vport->dev->type) {
+ case ARPHRD_NONE:
+ if (mac_proto == MAC_PROTO_ETHERNET) {
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+ skb->protocol = htons(ETH_P_TEB);
+ } else if (mac_proto != MAC_PROTO_NONE) {
+ WARN_ON_ONCE(1);
+ goto drop;
+ }
+ break;
+ case ARPHRD_ETHER:
+ if (mac_proto != MAC_PROTO_ETHERNET)
+ goto drop;
+ break;
+ default:
+ goto drop;
+ }
+
+ if (unlikely(packet_length(skb, vport->dev) > mtu &&
+ !skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
vport->dev->name,
- packet_length(skb), mtu);
+ packet_length(skb, vport->dev), mtu);
vport->dev->stats.tx_errors++;
goto drop;
}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index f01f28a567ad..cda66c26ad08 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -149,7 +149,6 @@ struct vport_ops {
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
-void ovs_vport_deferred_free(struct vport *vport);
#define VPORT_ALIGN 8
@@ -198,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops);
})
void ovs_vport_ops_unregister(struct vport_ops *ops);
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dd2332390c45..3d555c79a7b5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -73,7 +73,7 @@
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
@@ -1967,17 +1967,6 @@ static unsigned int run_filter(struct sk_buff *skb,
return res;
}
-static int __packet_rcv_vnet(const struct sk_buff *skb,
- struct virtio_net_hdr *vnet_hdr)
-{
- *vnet_hdr = (const struct virtio_net_hdr) { 0 };
-
- if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
- BUG();
-
- return 0;
-}
-
static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
size_t *len)
{
@@ -1987,7 +1976,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
return -EINVAL;
*len -= sizeof(vnet_hdr);
- if (__packet_rcv_vnet(skb, &vnet_hdr))
+ if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true))
return -EINVAL;
return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2246,8 +2235,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
spin_unlock(&sk->sk_receive_queue.lock);
if (po->has_vnet_hdr) {
- if (__packet_rcv_vnet(skb, h.raw + macoff -
- sizeof(struct virtio_net_hdr))) {
+ if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr),
+ vio_le(), true)) {
spin_lock(&sk->sk_receive_queue.lock);
goto drop_n_account;
}
@@ -2390,8 +2380,6 @@ static void tpacket_set_protocol(const struct net_device *dev,
static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
{
- unsigned short gso_type = 0;
-
if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
(__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
__virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
@@ -2403,69 +2391,22 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
return -EINVAL;
- if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
- case VIRTIO_NET_HDR_GSO_TCPV4:
- gso_type = SKB_GSO_TCPV4;
- break;
- case VIRTIO_NET_HDR_GSO_TCPV6:
- gso_type = SKB_GSO_TCPV6;
- break;
- case VIRTIO_NET_HDR_GSO_UDP:
- gso_type = SKB_GSO_UDP;
- break;
- default:
- return -EINVAL;
- }
-
- if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
- gso_type |= SKB_GSO_TCP_ECN;
-
- if (vnet_hdr->gso_size == 0)
- return -EINVAL;
- }
-
- vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
return 0;
}
static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
struct virtio_net_hdr *vnet_hdr)
{
- int n;
-
if (*len < sizeof(*vnet_hdr))
return -EINVAL;
*len -= sizeof(*vnet_hdr);
- n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
- if (n != sizeof(*vnet_hdr))
+ if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
return -EFAULT;
return __packet_snd_vnet_parse(vnet_hdr, *len);
}
-static int packet_snd_vnet_gso(struct sk_buff *skb,
- struct virtio_net_hdr *vnet_hdr)
-{
- if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
- u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
-
- if (!skb_partial_csum_set(skb, s, o))
- return -EINVAL;
- }
-
- skb_shinfo(skb)->gso_size =
- __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
- skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
-
- /* Header must be checked, and gso_segs computed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
- return 0;
-}
-
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, void *data, int tp_len,
__be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2725,7 +2666,8 @@ tpacket_error:
}
}
- if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+ if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
+ vio_le())) {
tp_len = -EINVAL;
goto tpacket_error;
}
@@ -2916,7 +2858,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
- err = packet_snd_vnet_gso(skb, &vnet_hdr);
+ err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
goto out_free;
len += sizeof(vnet_hdr);
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index fa8237fdc57b..21c28b51be94 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static int gprs_set_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops gprs_netdev_ops = {
.ndo_open = gprs_open,
.ndo_stop = gprs_close,
.ndo_start_xmit = gprs_xmit,
- .ndo_change_mtu = gprs_set_mtu,
};
static void gprs_setup(struct net_device *dev)
@@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev)
dev->type = ARPHRD_PHONET_PIPE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
dev->mtu = GPRS_DEFAULT_MTU;
+ dev->min_mtu = 576;
+ dev->max_mtu = (PHONET_MAX_MTU - 11);
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->tx_queue_len = 10;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 850a86cde0b3..8bad5624a27a 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1167,7 +1167,7 @@ disabled:
/* Wait until flow control allows TX */
done = atomic_read(&pn->tx_credits);
while (!done) {
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (!timeo) {
err = -EAGAIN;
@@ -1178,10 +1178,9 @@ disabled:
goto out;
}
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
- finish_wait(sk_sleep(sk), &wait);
+ add_wait_queue(sk_sleep(sk), &wait);
+ done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
if (sk->sk_state != TCP_ESTABLISHED)
goto disabled;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index a58680016472..2cb4c5dfad6f 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -44,7 +44,7 @@ struct phonet_net {
struct phonet_routes routes;
};
-static int phonet_net_id __read_mostly;
+static unsigned int phonet_net_id __read_mostly;
static struct phonet_net *phonet_pernet(struct net *net)
{
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c985ecbe9bd6..ae5ac175b2be 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -252,7 +252,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
const int pkt_len = 20;
struct qrtr_hdr *hdr;
struct sk_buff *skb;
- u32 *buf;
+ __le32 *buf;
skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
@@ -269,7 +269,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
hdr->dst_node_id = cpu_to_le32(dst_node);
hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
- buf = (u32 *)skb_put(skb, pkt_len);
+ buf = (__le32 *)skb_put(skb, pkt_len);
memset(buf, 0, pkt_len);
buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
buf[1] = cpu_to_le32(src_node);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 6beaeb1138f3..2ac1e6194be3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -605,10 +605,14 @@ static void rds_exit(void)
}
module_exit(rds_exit);
+u32 rds_gen_num;
+
static int rds_init(void)
{
int ret;
+ net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
+
ret = rds_bind_lock_init();
if (ret)
goto out;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index f5058559bb08..fe9d31c0b22d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
+ conn->c_my_gen_num = rds_gen_num;
+ conn->c_peer_gen_num = 0;
hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
@@ -681,6 +683,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
}
+EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
void rds_conn_connect_if_down(struct rds_connection *conn)
{
@@ -689,21 +692,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn)
}
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
-/*
- * An error occurred on the connection
- */
-void
-__rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vprintk(fmt, ap);
- va_end(ap);
-
- rds_conn_drop(conn);
-}
-
void
__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
{
diff --git a/net/rds/message.c b/net/rds/message.c
index 6cb91061556a..49bfb512d808 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -42,6 +42,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
[RDS_EXTHDR_NPATHS] = sizeof(u16),
+[RDS_EXTHDR_GEN_NUM] = sizeof(u32),
};
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c93badeabf2..ea961144084f 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -135,7 +135,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
/* Release any MRs associated with this socket */
spin_lock_irqsave(&rs->rs_rdma_lock, flags);
while ((node = rb_first(&rs->rs_rdma_keys))) {
- mr = container_of(node, struct rds_mr, r_rb_node);
+ mr = rb_entry(node, struct rds_mr, r_rb_node);
if (mr->r_trans == rs->rs_transport)
mr->r_invalidate = 0;
rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 345f09059e9f..d5f311767157 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -100,11 +100,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
trans->cm_connect_complete(conn, event);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ rdsdebug("Connection rejected: %s\n",
+ rdma_reject_msg(cm_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
if (conn)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 67ba67c058b1..ebbf909b87ec 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -151,6 +151,9 @@ struct rds_connection {
struct rds_conn_path c_path[RDS_MPATH_WORKERS];
wait_queue_head_t c_hs_waitq; /* handshake waitq */
+
+ u32 c_my_gen_num;
+ u32 c_peer_gen_num;
};
static inline
@@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest {
/* Extension header announcing number of paths.
* Implicit length = 2 bytes.
*/
-#define RDS_EXTHDR_NPATHS 4
+#define RDS_EXTHDR_NPATHS 5
+#define RDS_EXTHDR_GEN_NUM 6
#define __RDS_EXTHDR_MAX 16 /* for now */
@@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
#define RDS_MSG_RETRANSMITTED 5
#define RDS_MSG_MAPPED 6
#define RDS_MSG_PAGEVEC 7
+#define RDS_MSG_FLUSH 8
struct rds_message {
atomic_t m_refcount;
@@ -664,6 +669,7 @@ void rds_cong_exit(void);
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
/* conn.c */
+extern u32 rds_gen_num;
int rds_conn_init(void);
void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
@@ -683,10 +689,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
size_t item_len);
-__printf(2, 3)
-void __rds_conn_error(struct rds_connection *conn, const char *, ...);
-#define rds_conn_error(conn, fmt...) \
- __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
__printf(2, 3)
void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index cbfabdf3ff48..9d0666e5fe35 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
/* do nothing if no change in cong state */
}
+static void rds_conn_peer_gen_update(struct rds_connection *conn,
+ u32 peer_gen_num)
+{
+ int i;
+ struct rds_message *rm, *tmp;
+ unsigned long flags;
+
+ WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP);
+ if (peer_gen_num != 0) {
+ if (conn->c_peer_gen_num != 0 &&
+ peer_gen_num != conn->c_peer_gen_num) {
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ struct rds_conn_path *cp;
+
+ cp = &conn->c_path[i];
+ spin_lock_irqsave(&cp->cp_lock, flags);
+ cp->cp_next_tx_seq = 1;
+ cp->cp_next_rx_seq = 0;
+ list_for_each_entry_safe(rm, tmp,
+ &cp->cp_retrans,
+ m_conn_item) {
+ set_bit(RDS_MSG_FLUSH, &rm->m_flags);
+ }
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
+ }
+ }
+ conn->c_peer_gen_num = peer_gen_num;
+ }
+}
+
/*
* Process all extension headers that come with this message.
*/
@@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
union {
struct rds_ext_header_version version;
u16 rds_npaths;
+ u32 rds_gen_num;
} buffer;
+ u32 new_peer_gen_num = 0;
while (1) {
len = sizeof(buffer);
@@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
buffer.rds_npaths);
break;
+ case RDS_EXTHDR_GEN_NUM:
+ new_peer_gen_num = buffer.rds_gen_num;
+ break;
default:
pr_warn_ratelimited("ignoring unknown exthdr type "
"0x%x\n", type);
@@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
}
/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
conn->c_npaths = max_t(int, conn->c_npaths, 1);
+ rds_conn_peer_gen_update(conn, new_peer_gen_num);
}
/* rds_start_mprds() will synchronously start multiple paths when appropriate.
diff --git a/net/rds/send.c b/net/rds/send.c
index 896626b9a0ef..77c8c6e613ad 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -259,8 +259,9 @@ restart:
* connection.
* Therefore, we never retransmit messages with RDMA ops.
*/
- if (rm->rdma.op_active &&
- test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
+ if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) ||
+ (rm->rdma.op_active &&
+ test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) {
spin_lock_irqsave(&cp->cp_lock, flags);
if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
list_move(&rm->m_conn_item, &to_be_dropped);
@@ -1209,6 +1210,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_NPATHS, &npaths,
sizeof(npaths));
+ rds_message_add_extension(&rm->m_inc.i_hdr,
+ RDS_EXTHDR_GEN_NUM,
+ &cp->cp_conn->c_my_gen_num,
+ sizeof(u32));
}
spin_unlock_irqrestore(&cp->cp_lock, flags);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 20e2923dc827..57bb52361e0f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
write_unlock_bh(&sock->sk->sk_callback_lock);
}
-static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
+static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
@@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
unsigned long flags;
struct sockaddr_in sin;
int sinlen;
+ struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
- sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
- tsinfo.local_addr = sin.sin_addr.s_addr;
- tsinfo.local_port = sin.sin_port;
- sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1);
- tsinfo.peer_addr = sin.sin_addr.s_addr;
- tsinfo.peer_port = sin.sin_port;
+ sock = tc->t_sock;
+ if (sock) {
+ sock->ops->getname(sock, (struct sockaddr *)&sin,
+ &sinlen, 0);
+ tsinfo.local_addr = sin.sin_addr.s_addr;
+ tsinfo.local_port = sin.sin_port;
+ sock->ops->getname(sock, (struct sockaddr *)&sin,
+ &sinlen, 1);
+ tsinfo.peer_addr = sin.sin_addr.s_addr;
+ tsinfo.peer_port = sin.sin_port;
+ }
tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
tsinfo.data_rem = tc->t_tinc_data_rem;
@@ -360,7 +366,7 @@ struct rds_transport rds_tcp_transport = {
.t_mp_capable = 1,
};
-static int rds_tcp_netid;
+static unsigned int rds_tcp_netid;
/* per-network namespace private data for this module */
struct rds_tcp_net {
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 05f61c533ed3..d6839d96d539 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -60,7 +60,19 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_SYN_RECV:
break;
case TCP_ESTABLISHED:
- rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+ /* Force the peer to reconnect so that we have the
+ * TCP ports going from <smaller-ip>.<transient> to
+ * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the
+ * RDS connection as RDS_CONN_UP until the reconnect,
+ * to avoid RDS datagram loss.
+ */
+ if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr &&
+ rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
+ RDS_CONN_ERROR)) {
+ rds_conn_path_drop(cp);
+ } else {
+ rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+ }
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index e0b23fb5b8d5..f74bab3ecdca 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -83,27 +83,22 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
{
int i;
bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
- int npaths = conn->c_npaths;
-
- if (npaths <= 1) {
- struct rds_conn_path *cp = &conn->c_path[0];
- int ret;
-
- ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
- RDS_CONN_CONNECTING);
- if (!ret)
- rds_conn_path_transition(cp, RDS_CONN_ERROR,
- RDS_CONN_CONNECTING);
- return cp->cp_transport_data;
- }
+ int npaths = max_t(int, 1, conn->c_npaths);
- /* for mprds, paths with cp_index > 0 MUST be initiated by the peer
+ /* for mprds, all paths MUST be initiated by the peer
* with the smaller address.
*/
- if (!peer_is_smaller)
+ if (!peer_is_smaller) {
+ /* Make sure we initiate at least one path if this
+ * has not already been done; rds_start_mprds() will
+ * take care of additional paths, if necessary.
+ */
+ if (npaths == 1)
+ rds_conn_path_connect_if_down(&conn->c_path[0]);
return NULL;
+ }
- for (i = 1; i < npaths; i++) {
+ for (i = 0; i < npaths; i++) {
struct rds_conn_path *cp = &conn->c_path[i];
if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
@@ -171,8 +166,8 @@ int rds_tcp_accept_one(struct socket *sock)
mutex_lock(&rs_tcp->t_conn_path_lock);
cp = rs_tcp->t_cpath;
conn_state = rds_conn_path_state(cp);
- if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP &&
- conn_state != RDS_CONN_ERROR)
+ WARN_ON(conn_state == RDS_CONN_UP);
+ if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
goto rst_nsk;
if (rs_tcp->t_sock) {
/* Need to resolve a duelling SYN between peers.
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 89d09b481f47..dcf4742083ea 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -100,6 +100,9 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
tc->t_last_expected_una = rm->m_ack_seq + 1;
+ if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
+ rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
+
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
rm, rds_tcp_snd_nxt(tc),
(unsigned long long)rm->m_ack_seq);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e42df11bf30a..e36e333a0aa0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work)
RDS_CONN_DOWN))
rds_queue_reconnect(cp);
else
- rds_conn_path_error(cp,
- "RDS: connect failed\n");
+ rds_conn_path_error(cp, "connect failed\n");
}
}
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 129d357d2722..9ad301c46b88 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -34,7 +34,7 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/termios.h>
#include <linux/mm.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 0fc76d845103..452bbb38d943 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/termios.h> /* For TIOCINQ/OUTQ */
#include <linux/mm.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2d59c9be40e1..5f63f6dcaabb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -762,16 +762,17 @@ static const struct net_proto_family rxrpc_family_ops = {
static int __init af_rxrpc_init(void)
{
int ret = -1;
+ unsigned int tmp;
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
- get_random_bytes(&rxrpc_client_conn_ids.cur,
- sizeof(rxrpc_client_conn_ids.cur));
- rxrpc_client_conn_ids.cur &= 0x3fffffff;
- if (rxrpc_client_conn_ids.cur == 0)
- rxrpc_client_conn_ids.cur = 1;
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp &= 0x3fffffff;
+ if (tmp == 0)
+ tmp = 1;
+ idr_set_cursor(&rxrpc_client_conn_ids, tmp);
ret = -ENOMEM;
rxrpc_call_jar = kmem_cache_create(
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 60ef9605167e..6cbcdcc29853 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -263,12 +263,12 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
* times the maximum number of client conns away from the current
* allocation point to try and keep the IDs concentrated.
*/
- id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+ id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
id = conn->proto.cid >> RXRPC_CIDSHIFT;
distance = id - id_cursor;
if (distance < 0)
distance = -distance;
- limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+ limit = max(rxrpc_max_client_connections * 4, 1024U);
if (distance > limit)
goto mark_dont_reuse;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 44fb8d893c7d..1d87b5453ef7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
ASSERT(!irqs_disabled());
- skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
+ skb = skb_recv_udp(udp_sk, 0, 1, &ret);
if (!skb) {
if (ret == -EAGAIN)
return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
- /* The socket buffer we have is owned by UDP, with UDP's data all over
- * it, but we really want our own data there.
+ /* The UDP protocol already released all skb resources;
+ * we are free to add our own data there.
*/
- skb_orphan(skb);
sp = rxrpc_skb(skb);
/* dig out the RxRPC connection details */
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f893d180da1c..e10456ef6f7a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -41,8 +41,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfa_head);
spin_unlock_bh(&hinfo->lock);
- gen_kill_estimator(&p->tcfa_bstats,
- &p->tcfa_rate_est);
+ gen_kill_estimator(&p->tcfa_rate_est);
/*
* gen_estimator est_timer() might access p->tcfa_lock
* or bstats, wait a RCU grace period before freeing p
@@ -237,8 +236,7 @@ EXPORT_SYMBOL(tcf_hash_check);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
{
if (est)
- gen_kill_estimator(&a->tcfa_bstats,
- &a->tcfa_rate_est);
+ gen_kill_estimator(&a->tcfa_rate_est);
call_rcu(&a->tcfa_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
@@ -670,8 +668,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
goto errout;
if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &p->tcfa_bstats,
- &p->tcfa_rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
p->tcfa_qstats.qlen) < 0)
@@ -903,8 +900,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
- if (event == RTM_GETACTION)
- act->tcfa_refcnt++;
list_add_tail(&act->list, &actions);
}
@@ -917,7 +912,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
err:
- tcf_action_destroy(&actions, 0);
+ if (event != RTM_GETACTION)
+ tcf_action_destroy(&actions, 0);
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d3960033f61..520baa41cba3 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -28,12 +28,11 @@ struct tcf_bpf_cfg {
struct bpf_prog *filter;
struct sock_filter *bpf_ops;
const char *bpf_name;
- u32 bpf_fd;
u16 bpf_num_ops;
bool is_ebpf;
};
-static int bpf_net_id;
+static unsigned int bpf_net_id;
static struct tc_action_ops act_bpf_ops;
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -118,13 +117,18 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
struct sk_buff *skb)
{
- if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
- return -EMSGSIZE;
+ struct nlattr *nla;
if (prog->bpf_name &&
nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
return -EMSGSIZE;
+ nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
+
return 0;
}
@@ -226,16 +230,13 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
return PTR_ERR(fp);
if (tb[TCA_ACT_BPF_NAME]) {
- name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
- nla_len(tb[TCA_ACT_BPF_NAME]),
- GFP_KERNEL);
+ name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL);
if (!name) {
bpf_prog_put(fp);
return -ENOMEM;
}
}
- cfg->bpf_fd = bpf_fd;
cfg->bpf_name = name;
cfg->filter = fp;
cfg->is_ebpf = true;
@@ -334,8 +335,6 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (cfg.bpf_num_ops)
prog->bpf_num_ops = cfg.bpf_num_ops;
- if (cfg.bpf_fd)
- prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
rcu_assign_pointer(prog->filter, cfg.filter);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index eae07a2e774d..ab8062909962 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,7 +30,7 @@
#define CONNMARK_TAB_MASK 3
-static int connmark_net_id;
+static unsigned int connmark_net_id;
static struct tc_action_ops act_connmark_ops;
static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0defcef376d..a0edd80a44db 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -42,7 +42,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};
-static int csum_net_id;
+static unsigned int csum_net_id;
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e0aa30f83c6c..e6c874a2b283 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,7 +25,7 @@
#define GACT_TAB_MASK 15
-static int gact_net_id;
+static unsigned int gact_net_id;
static struct tc_action_ops act_gact_ops;
#ifdef CONFIG_GACT_PROB
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 95c463cbb9a6..80b848d3f096 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -35,7 +35,7 @@
#define IFE_TAB_MASK 15
-static int ife_net_id;
+static unsigned int ife_net_id;
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 378c1c976058..992ef8d624f1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,10 +30,10 @@
#define IPT_TAB_MASK 15
-static int ipt_net_id;
+static unsigned int ipt_net_id;
static struct tc_action_ops act_ipt_ops;
-static int xt_net_id;
+static unsigned int xt_net_id;
static struct tc_action_ops act_xt_ops;
static int ipt_init_target(struct xt_entry_target *t, char *table,
@@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
struct xt_action_param par;
+ struct nf_hook_state state = {
+ .net = dev_net(skb->dev),
+ .in = skb->dev,
+ .hook = ipt->tcfi_hook,
+ .pf = NFPROTO_IPV4,
+ };
if (skb_unclone(skb, GFP_ATOMIC))
return TC_ACT_UNSPEC;
@@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
* worry later - danger - this API seems to have changed
* from earlier kernels
*/
- par.net = dev_net(skb->dev);
- par.in = skb->dev;
- par.out = NULL;
- par.hooknum = ipt->tcfi_hook;
+ par.state = &state;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
- par.family = NFPROTO_IPV4;
ret = par.target->target(skb, &par);
switch (ret) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6b07fba5770b..2d9fa6e0a1b4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfp.h>
+#include <linux/if_arp.h>
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -33,6 +34,25 @@
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
+static bool tcf_mirred_is_act_redirect(int action)
+{
+ return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
+}
+
+static u32 tcf_mirred_act_direction(int action)
+{
+ switch (action) {
+ case TCA_EGRESS_REDIR:
+ case TCA_EGRESS_MIRROR:
+ return AT_EGRESS;
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
+ return AT_INGRESS;
+ default:
+ BUG();
+ }
+}
+
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
@@ -51,7 +71,7 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
};
-static int mirred_net_id;
+static unsigned int mirred_net_id;
static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
@@ -60,11 +80,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0;
bool exists = false;
+ int ret;
if (nla == NULL)
return -EINVAL;
@@ -82,6 +103,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
case TCA_EGRESS_REDIR:
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
break;
default:
if (exists)
@@ -95,19 +118,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_hash_release(*a, bind);
return -ENODEV;
}
- switch (dev->type) {
- case ARPHRD_TUNNEL:
- case ARPHRD_TUNNEL6:
- case ARPHRD_SIT:
- case ARPHRD_IPGRE:
- case ARPHRD_VOID:
- case ARPHRD_NONE:
- ok_push = 0;
- break;
- default:
- ok_push = 1;
- break;
- }
+ mac_header_xmit = dev_is_mac_header_xmit(dev);
} else {
dev = NULL;
}
@@ -136,7 +147,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
rcu_assign_pointer(m->tcfm_dev, dev);
- m->tcfm_ok_push = ok_push;
+ m->tcfm_mac_header_xmit = mac_header_xmit;
}
if (ret == ACT_P_CREATED) {
@@ -153,15 +164,20 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
+ bool m_mac_header_xmit;
struct net_device *dev;
struct sk_buff *skb2;
- int retval, err;
+ int retval, err = 0;
+ int m_eaction;
+ int mac_len;
u32 at;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
dev = rcu_dereference(m->tcfm_dev);
if (unlikely(!dev)) {
@@ -180,23 +196,36 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!skb2)
goto out;
- if (!(at & AT_EGRESS)) {
- if (m->tcfm_ok_push)
+ /* If action's target direction differs than filter's direction,
+ * and devices expect a mac header on xmit, then mac push/pull is
+ * needed.
+ */
+ if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) {
+ if (at & AT_EGRESS) {
+ /* caught at egress, act ingress: pull mac */
+ mac_len = skb_network_header(skb) - skb_mac_header(skb);
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+ /* caught at ingress, act egress: push mac */
skb_push_rcsum(skb2, skb->mac_len);
+ }
}
/* mirror is always swallowed */
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ if (tcf_mirred_is_act_redirect(m_eaction))
skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- err = dev_queue_xmit(skb2);
+ if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS)
+ err = dev_queue_xmit(skb2);
+ else
+ err = netif_receive_skb(skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
rcu_read_unlock();
@@ -286,6 +315,17 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
+static int tcf_mirred_device(const struct tc_action *a, struct net *net,
+ struct net_device **mirred_dev)
+{
+ int ifindex = tcf_mirred_ifindex(a);
+
+ *mirred_dev = __dev_get_by_index(net, ifindex);
+ if (!*mirred_dev)
+ return -EINVAL;
+ return 0;
+}
+
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.type = TCA_ACT_MIRRED,
@@ -298,6 +338,7 @@ static struct tc_action_ops act_mirred_ops = {
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
+ .get_dev = tcf_mirred_device,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 8e8b0cc30704..9b6aec665495 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,7 +31,7 @@
#define NAT_TAB_MASK 15
-static int nat_net_id;
+static unsigned int nat_net_id;
static struct tc_action_ops act_nat_ops;
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cf9b2fe8eac6..b27c4daec88f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,7 +25,7 @@
#define PEDIT_TAB_MASK 15
-static int pedit_net_id;
+static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index d1bd248fe146..0ba91d1ce994 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,7 +55,7 @@ struct tc_police_compat {
/* Each policer is serialized by its individual spinlock */
-static int police_net_id;
+static unsigned int police_net_id;
static struct tc_action_ops act_police_ops;
static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
@@ -142,8 +142,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
goto failure_unlock;
} else if (tb[TCA_POLICE_AVRATE] &&
(ret == ACT_P_CREATED ||
- !gen_estimator_active(&police->tcf_bstats,
- &police->tcf_rate_est))) {
+ !gen_estimator_active(&police->tcf_rate_est))) {
err = -EINVAL;
goto failure_unlock;
}
@@ -216,13 +215,17 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
bstats_update(&police->tcf_bstats, skb);
tcf_lastuse_update(&police->tcf_tm);
- if (police->tcfp_ewma_rate &&
- police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
- police->tcf_qstats.overlimits++;
- if (police->tcf_action == TC_ACT_SHOT)
- police->tcf_qstats.drops++;
- spin_unlock(&police->tcf_lock);
- return police->tcf_action;
+ if (police->tcfp_ewma_rate) {
+ struct gnet_stats_rate_est64 sample;
+
+ if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
+ sample.bps >= police->tcfp_ewma_rate) {
+ police->tcf_qstats.overlimits++;
+ if (police->tcf_action == TC_ACT_SHOT)
+ police->tcf_qstats.drops++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
+ }
}
if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 289af6f9bb3b..823a73ad0c60 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,7 +26,7 @@
#define SIMP_TAB_MASK 7
-static int simp_net_id;
+static unsigned int simp_net_id;
static struct tc_action_ops act_simp_ops;
#define SIMP_MAX_DATA 32
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index a133dcb82132..06ccae3c12ee 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,7 +29,7 @@
#define SKBEDIT_TAB_MASK 15
-static int skbedit_net_id;
+static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
@@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
skb->dev->real_num_tx_queues > d->queue_mapping)
skb_set_queue_mapping(skb, d->queue_mapping);
- if (d->flags & SKBEDIT_F_MARK)
- skb->mark = d->mark;
+ if (d->flags & SKBEDIT_F_MARK) {
+ skb->mark &= ~d->mask;
+ skb->mark |= d->mark & d->mask;
+ }
if (d->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = d->ptype;
@@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
+ [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- u32 flags = 0, *priority = NULL, *mark = NULL;
+ u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
@@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
mark = nla_data(tb[TCA_SKBEDIT_MARK]);
}
+ if (tb[TCA_SKBEDIT_MASK] != NULL) {
+ flags |= SKBEDIT_F_MASK;
+ mask = nla_data(tb[TCA_SKBEDIT_MASK]);
+ }
+
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
exists = tcf_hash_check(tn, parm->index, a, bind);
@@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
d->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
d->ptype = *ptype;
+ /* default behaviour is to use all the bits */
+ d->mask = 0xffffffff;
+ if (flags & SKBEDIT_F_MASK)
+ d->mask = *mask;
d->tcf_action = parm->action;
@@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
if ((d->flags & SKBEDIT_F_PTYPE) &&
nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_MASK) &&
+ nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+ goto nla_put_failure;
tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e7d96381c908..3b7074e23024 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -22,7 +22,7 @@
#define SKBMOD_TAB_MASK 15
-static int skbmod_net_id;
+static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
#define MAX_EDIT_LEN ETH_HLEN
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index af47bdf2f483..e3a58e021198 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -16,14 +16,13 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
-#include <net/dst_metadata.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
#define TUNNEL_KEY_TAB_MASK 15
-static int tunnel_key_net_id;
+static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
@@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
+ [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
};
static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
+ __be16 dst_port = 0;
__be64 key_id;
int ret = 0;
int err;
@@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+ if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
+ dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
+
if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
__be32 saddr;
@@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
- TUNNEL_KEY, key_id, 0);
+ dst_port, TUNNEL_KEY,
+ key_id, 0);
} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
struct in6_addr saddr;
@@ -129,8 +134,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
- metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
- TUNNEL_KEY, key_id, 0);
+ metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+ 0, TUNNEL_KEY,
+ key_id, 0);
}
if (!metadata) {
@@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
tunnel_key_dump_addresses(skb,
- &params->tcft_enc_metadata->u.tun_info))
+ &params->tcft_enc_metadata->u.tun_info) ||
+ nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst))
goto nla_put_failure;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index b57fcbcefea1..19e0dba305ce 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,7 +21,7 @@
#define VLAN_TAB_MASK 15
-static int vlan_net_id;
+static unsigned int vlan_net_id;
static struct tc_action_ops act_vlan_ops;
static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b05d4a2155b0..1ecdf809b5fa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
unsigned long cl;
unsigned long fh;
int err;
- int tp_created = 0;
+ int tp_created;
if ((n->nlmsg_type != RTM_GETTFILTER) &&
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
+ tp_created = 0;
+
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
if (err < 0)
return err;
@@ -682,6 +684,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+ struct net_device **hw_dev)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+
+ if (tc_no_actions(exts))
+ return -EINVAL;
+
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ if (a->ops->get_dev) {
+ a->ops->get_dev(a, dev_net(dev), hw_dev);
+ break;
+ }
+ }
+ if (*hw_dev)
+ return 0;
+#endif
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tcf_exts_get_dev);
+
static int __init tc_filter_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0a47ba5e6109..d9c97018317d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -45,10 +45,7 @@ struct cls_bpf_prog {
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
- union {
- u32 bpf_fd;
- u16 bpf_num_ops;
- };
+ u16 bpf_num_ops;
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
@@ -244,7 +241,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
return 0;
}
-static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
@@ -258,22 +255,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
kfree(prog);
}
-static void __cls_bpf_delete_prog(struct rcu_head *rcu)
+static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
{
- struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
-
- cls_bpf_delete_prog(prog->tp, prog);
+ __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
}
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
-
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+ call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+}
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
return 0;
}
@@ -285,12 +282,8 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
if (!force && !list_empty(&head->plist))
return false;
- list_for_each_entry_safe(prog, tmp, &head->plist, link) {
- cls_bpf_stop_offload(tp, prog);
- list_del_rcu(&prog->link);
- tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, __cls_bpf_delete_prog);
- }
+ list_for_each_entry_safe(prog, tmp, &head->plist, link)
+ __cls_bpf_delete(tp, prog);
kfree_rcu(head, rcu);
return true;
@@ -365,9 +358,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return PTR_ERR(fp);
if (tb[TCA_BPF_NAME]) {
- name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
- nla_len(tb[TCA_BPF_NAME]),
- GFP_KERNEL);
+ name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
if (!name) {
bpf_prog_put(fp);
return -ENOMEM;
@@ -375,7 +366,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
}
prog->bpf_ops = NULL;
- prog->bpf_fd = bpf_fd;
prog->bpf_name = name;
prog->filter = fp;
@@ -517,14 +507,14 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
- cls_bpf_delete_prog(tp, prog);
+ __cls_bpf_delete_prog(prog);
return ret;
}
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
- call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+ call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
} else {
list_add_rcu(&prog->link, &head->plist);
}
@@ -559,13 +549,18 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
struct sk_buff *skb)
{
- if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
- return -EMSGSIZE;
+ struct nlattr *nla;
if (prog->bpf_name &&
nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
return -EMSGSIZE;
+ nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
+
return 0;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 904442421db3..5752789acc13 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -39,11 +39,13 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_icmp icmp;
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
+ struct flow_dissector_key_ports enc_tp;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -81,6 +83,8 @@ struct cls_fl_filter {
u32 handle;
u32 flags;
struct rcu_head rcu;
+ struct tc_to_netdev tc;
+ struct net_device *hw_dev;
};
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -149,16 +153,22 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
switch (ip_tunnel_info_af(info)) {
case AF_INET:
+ skb_key.enc_control.addr_type =
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS;
skb_key.enc_ipv4.src = key->u.ipv4.src;
skb_key.enc_ipv4.dst = key->u.ipv4.dst;
break;
case AF_INET6:
+ skb_key.enc_control.addr_type =
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS;
skb_key.enc_ipv6.src = key->u.ipv6.src;
skb_key.enc_ipv6.dst = key->u.ipv6.dst;
break;
}
skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+ skb_key.enc_tp.src = key->tp_src;
+ skb_key.enc_tp.dst = key->tp_dst;
}
skb_key.indev_ifindex = skb->skb_iif;
@@ -202,75 +212,90 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct net_device *dev = f->hw_dev;
+ struct tc_to_netdev *tc = &f->tc;
- if (!tc_should_offload(dev, tp, 0))
+ if (!tc_can_offload(dev, tp))
return;
offload.command = TC_CLSFLOWER_DESTROY;
- offload.cookie = cookie;
+ offload.cookie = (unsigned long)f;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct flow_dissector *dissector,
struct fl_flow_key *mask,
- struct fl_flow_key *key,
- struct tcf_exts *actions,
- unsigned long cookie, u32 flags)
+ struct cls_fl_filter *f)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct tc_to_netdev *tc = &f->tc;
int err;
- if (!tc_should_offload(dev, tp, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
+ if (!tc_can_offload(dev, tp)) {
+ if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
+ (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
+ f->hw_dev = dev;
+ return tc_skip_sw(f->flags) ? -EINVAL : 0;
+ }
+ dev = f->hw_dev;
+ tc->egress_dev = true;
+ } else {
+ f->hw_dev = dev;
+ }
offload.command = TC_CLSFLOWER_REPLACE;
- offload.cookie = cookie;
+ offload.cookie = (unsigned long)f;
offload.dissector = dissector;
offload.mask = mask;
- offload.key = key;
- offload.exts = actions;
+ offload.key = &f->mkey;
+ offload.exts = &f->exts;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &tc);
+ tc);
- if (tc_skip_sw(flags))
+ if (tc_skip_sw(f->flags))
return err;
-
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct net_device *dev = f->hw_dev;
+ struct tc_to_netdev *tc = &f->tc;
- if (!tc_should_offload(dev, tp, 0))
+ if (!tc_can_offload(dev, tp))
return;
offload.command = TC_CLSFLOWER_STATS;
offload.cookie = (unsigned long)f;
offload.exts = &f->exts;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+}
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+ list_del_rcu(&f->list);
+ if (!tc_skip_hw(f->flags))
+ fl_hw_destroy_filter(tp, f);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -299,14 +324,12 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
if (!force && !list_empty(&head->filters))
return false;
- list_for_each_entry_safe(f, next, &head->filters, list) {
- fl_hw_destroy_filter(tp, (unsigned long)f);
- list_del_rcu(&f->list);
- call_rcu(&f->rcu, fl_destroy_filter);
- }
+ list_for_each_entry_safe(f, next, &head->filters, list)
+ __fl_delete(tp, f);
__module_get(THIS_MODULE);
call_rcu(&head->rcu, fl_destroy_rcu);
+
return true;
}
@@ -360,6 +383,24 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -394,10 +435,43 @@ static void fl_set_key_vlan(struct nlattr **tb,
}
}
+static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
+ u32 *dissector_key, u32 *dissector_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (flower_mask & flower_flag_bit) {
+ *dissector_mask |= dissector_flag_bit;
+ if (flower_key & flower_flag_bit)
+ *dissector_key |= dissector_flag_bit;
+ }
+}
+
+static int fl_set_key_flags(struct nlattr **tb,
+ u32 *flags_key, u32 *flags_mask)
+{
+ u32 key, mask;
+
+ /* mask is mandatory for flags */
+ if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
+ return -EINVAL;
+
+ key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
+ mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+
+ *flags_key = 0;
+ *flags_mask = 0;
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ return 0;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
__be16 ethertype;
+ int ret = 0;
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FLOWER_INDEV]) {
int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -439,6 +513,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ mask->control.addr_type = ~0;
fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src));
@@ -447,6 +522,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
sizeof(key->ipv4.dst));
} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ mask->control.addr_type = ~0;
fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src));
@@ -469,11 +545,39 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_SCTP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+ &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+ &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+ sizeof(key->tp.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code));
}
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ mask->enc_control.addr_type = ~0;
fl_set_key_val(tb, &key->enc_ipv4.src,
TCA_FLOWER_KEY_ENC_IPV4_SRC,
&mask->enc_ipv4.src,
@@ -489,6 +593,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ mask->enc_control.addr_type = ~0;
fl_set_key_val(tb, &key->enc_ipv6.src,
TCA_FLOWER_KEY_ENC_IPV6_SRC,
&mask->enc_ipv6.src,
@@ -505,7 +610,18 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
sizeof(key->enc_key_id.keyid));
- return 0;
+ fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+ &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+ sizeof(key->enc_tp.src));
+
+ fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+ &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+ sizeof(key->enc_tp.dst));
+
+ if (tb[TCA_FLOWER_KEY_FLAGS])
+ ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
+
+ return ret;
}
static bool fl_mask_eq(struct fl_flow_mask *mask1,
@@ -571,7 +687,21 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ICMP, icmp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
+ if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
+ FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ enc_control);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
skb_flow_dissector_init(&head->dissector, keys, cnt);
}
@@ -721,21 +851,21 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- err = fl_hw_replace_filter(tp,
- &head->dissector,
- &mask.key,
- &fnew->key,
- &fnew->exts,
- (unsigned long)fnew,
- fnew->flags);
- if (err)
- goto errout;
+ if (!tc_skip_hw(fnew->flags)) {
+ err = fl_hw_replace_filter(tp,
+ &head->dissector,
+ &mask.key,
+ fnew);
+ if (err)
+ goto errout;
+ }
if (fold) {
if (!tc_skip_sw(fold->flags))
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
- fl_hw_destroy_filter(tp, (unsigned long)fold);
+ if (!tc_skip_hw(fold->flags))
+ fl_hw_destroy_filter(tp, fold);
}
*arg = (unsigned long) fnew;
@@ -764,10 +894,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- list_del_rcu(&f->list);
- fl_hw_destroy_filter(tp, (unsigned long)f);
- tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fl_destroy_filter);
+ __fl_delete(tp, f);
return 0;
}
@@ -830,6 +957,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
return 0;
}
+static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
+ u32 *flower_key, u32 *flower_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (dissector_mask & dissector_flag_bit) {
+ *flower_mask |= flower_flag_bit;
+ if (dissector_key & dissector_flag_bit)
+ *flower_key |= flower_flag_bit;
+ }
+}
+
+static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
+{
+ u32 key, mask;
+ __be32 _key, _mask;
+ int err;
+
+ if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
+ return 0;
+
+ key = 0;
+ mask = 0;
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ _key = cpu_to_be32(key);
+ _mask = cpu_to_be32(mask);
+
+ err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
+ if (err)
+ return err;
+
+ return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
+}
+
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -862,7 +1025,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
}
- fl_hw_update_stats(tp, f);
+ if (!tc_skip_hw(f->flags))
+ fl_hw_update_stats(tp, f);
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
@@ -918,6 +1082,36 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
&mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_SCTP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+ &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+ &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6 &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -943,7 +1137,20 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
&mask->enc_key_id, TCA_FLOWER_UNSPEC,
- sizeof(key->enc_key_id)))
+ sizeof(key->enc_key_id)) ||
+ fl_dump_key_val(skb, &key->enc_tp.src,
+ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+ &mask->enc_tp.src,
+ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+ sizeof(key->enc_tp.src)) ||
+ fl_dump_key_val(skb, &key->enc_tp.dst,
+ TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+ &mask->enc_tp.dst,
+ TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+ sizeof(key->enc_tp.dst)))
+ goto nla_put_failure;
+
+ if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
goto nla_put_failure;
nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index f935429bd5ef..b12bc2abea93 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -16,16 +16,11 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
-struct cls_mall_filter {
+struct cls_mall_head {
struct tcf_exts exts;
struct tcf_result res;
u32 handle;
- struct rcu_head rcu;
u32 flags;
-};
-
-struct cls_mall_head {
- struct cls_mall_filter *filter;
struct rcu_head rcu;
};
@@ -33,38 +28,29 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_mall_head *head = rcu_dereference_bh(tp->root);
- struct cls_mall_filter *f = head->filter;
- if (tc_skip_sw(f->flags))
+ if (tc_skip_sw(head->flags))
return -1;
- return tcf_exts_exec(skb, &f->exts, res);
+ return tcf_exts_exec(skb, &head->exts, res);
}
static int mall_init(struct tcf_proto *tp)
{
- struct cls_mall_head *head;
-
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (!head)
- return -ENOBUFS;
-
- rcu_assign_pointer(tp->root, head);
-
return 0;
}
-static void mall_destroy_filter(struct rcu_head *head)
+static void mall_destroy_rcu(struct rcu_head *rcu)
{
- struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu);
+ struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
+ rcu);
- tcf_exts_destroy(&f->exts);
-
- kfree(f);
+ tcf_exts_destroy(&head->exts);
+ kfree(head);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long cookie)
{
struct net_device *dev = tp->q->dev_queue->dev;
@@ -74,7 +60,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
offload.type = TC_SETUP_MATCHALL;
offload.cls_mall = &mall_offload;
offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
- offload.cls_mall->exts = &f->exts;
+ offload.cls_mall->exts = &head->exts;
offload.cls_mall->cookie = cookie;
return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
@@ -82,7 +68,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
}
static void mall_destroy_hw_filter(struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long cookie)
{
struct net_device *dev = tp->q->dev_queue->dev;
@@ -103,29 +89,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct net_device *dev = tp->q->dev_queue->dev;
- struct cls_mall_filter *f = head->filter;
- if (!force && f)
- return false;
+ if (!head)
+ return true;
- if (f) {
- if (tc_should_offload(dev, tp, f->flags))
- mall_destroy_hw_filter(tp, f, (unsigned long) f);
+ if (tc_should_offload(dev, tp, head->flags))
+ mall_destroy_hw_filter(tp, head, (unsigned long) head);
- call_rcu(&f->rcu, mall_destroy_filter);
- }
- kfree_rcu(head, rcu);
+ call_rcu(&head->rcu, mall_destroy_rcu);
return true;
}
static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = head->filter;
-
- if (f && f->handle == handle)
- return (unsigned long) f;
- return 0;
+ return 0UL;
}
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -134,7 +111,7 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
@@ -147,11 +124,11 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
return err;
if (tb[TCA_MATCHALL_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
+ head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+ tcf_bind_filter(tp, &head->res, base);
}
- tcf_exts_change(tp, &f->exts, &e);
+ tcf_exts_change(tp, &head->exts, &e);
return 0;
}
@@ -162,21 +139,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
unsigned long *arg, bool ovr)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg;
struct net_device *dev = tp->q->dev_queue->dev;
- struct cls_mall_filter *f;
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+ struct cls_mall_head *new;
u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
- if (head->filter)
- return -EBUSY;
-
- if (fold)
- return -EINVAL;
+ if (head)
+ return -EEXIST;
err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
tca[TCA_OPTIONS], mall_policy);
@@ -189,23 +162,23 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
return -ENOBUFS;
- tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0);
+ tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
if (!handle)
handle = 1;
- f->handle = handle;
- f->flags = flags;
+ new->handle = handle;
+ new->flags = flags;
- err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
if (err)
goto errout;
if (tc_should_offload(dev, tp, flags)) {
- err = mall_replace_hw_filter(tp, f, (unsigned long) f);
+ err = mall_replace_hw_filter(tp, new, (unsigned long) new);
if (err) {
if (tc_skip_sw(flags))
goto errout;
@@ -214,39 +187,29 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
}
}
- *arg = (unsigned long) f;
- rcu_assign_pointer(head->filter, f);
-
+ *arg = (unsigned long) head;
+ rcu_assign_pointer(tp->root, new);
+ if (head)
+ call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
errout:
- kfree(f);
+ kfree(new);
return err;
}
static int mall_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = (struct cls_mall_filter *) arg;
- struct net_device *dev = tp->q->dev_queue->dev;
-
- if (tc_should_offload(dev, tp, f->flags))
- mall_destroy_hw_filter(tp, f, (unsigned long) f);
-
- RCU_INIT_POINTER(head->filter, NULL);
- tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, mall_destroy_filter);
- return 0;
+ return -EOPNOTSUPP;
}
static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = head->filter;
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) f, arg) < 0)
+ if (arg->fn(tp, (unsigned long) head, arg) < 0)
arg->stop = 1;
skip:
arg->count++;
@@ -255,28 +218,28 @@ skip:
static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_mall_filter *f = (struct cls_mall_filter *) fh;
+ struct cls_mall_head *head = (struct cls_mall_head *) fh;
struct nlattr *nest;
- if (!f)
+ if (!head)
return skb->len;
- t->tcm_handle = f->handle;
+ t->tcm_handle = head->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (!nest)
goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid))
+ if (head->res.classid &&
+ nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts))
+ if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ if (tcf_exts_dump_stats(skb, &head->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index c66ca9400ab4..c1b23e3060b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct xt_action_param acpar;
const struct xt_set_info *set = (const void *) em->data;
struct net_device *dev, *indev = NULL;
+ struct nf_hook_state state = {
+ .net = em->net,
+ };
int ret, network_offset;
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- acpar.family = NFPROTO_IPV4;
+ state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
acpar.thoff = ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6):
- acpar.family = NFPROTO_IPV6;
+ state.pf = NFPROTO_IPV6;
if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
return 0;
/* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
@@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
return 0;
}
- acpar.hooknum = 0;
-
- opt.family = acpar.family;
+ opt.family = state.pf;
opt.dim = set->dim;
opt.flags = set->flags;
opt.cmdflags = 0;
@@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
- acpar.net = em->net;
- acpar.in = indev ? indev : dev;
- acpar.out = dev;
+ state.in = indev ? indev : dev;
+ state.out = dev;
+ acpar.state = &state;
ret = ip_set_test(set->index, skb, &acpar, &opt);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a309a07ccb35..41c80b6c3906 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag)
{
unsigned short tag;
- tag = skb_vlan_tag_get(skb);
- if (!tag && __vlan_get_tag(skb, &tag))
- *err = -1;
- else
+ if (skb_vlan_tag_present(skb))
+ dst->value = skb_vlan_tag_get(skb);
+ else if (!__vlan_get_tag(skb, &tag))
dst->value = tag;
+ else
+ *err = -1;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 206dc24add3a..d7b93429f0cc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->handle = handle;
+ /* This exist to keep backward compatible with a userspace
+ * loophole, what allowed userspace to get IFF_NO_QUEUE
+ * facility on older kernels by setting tx_queue_len=0 (prior
+ * to qdisc init), and then forgot to reinit tx_queue_len
+ * before again attaching a qdisc.
+ */
+ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
+ }
+
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
if (qdisc_is_percpu_stats(sch)) {
sch->cpu_bstats =
@@ -1384,7 +1395,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
&d, cpu_bstats, &q->bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beb554aa8cfb..f1207582cbf3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -122,7 +122,7 @@ struct cbq_class {
psched_time_t penalized;
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct tc_cbq_xstats xstats;
struct tcf_proto __rcu *filter_list;
@@ -509,7 +509,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
if (delay) {
ktime_t time;
- time = ktime_set(0, 0);
+ time = 0;
time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
}
@@ -1346,7 +1346,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
@@ -1405,7 +1405,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
if (cl != &q->link)
kfree(cl);
}
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8af5c59eef84..bb4cbdf75004 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct list_head alist;
struct Qdisc *qdisc;
@@ -142,7 +142,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
{
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
qdisc_destroy(cl->qdisc);
kfree(cl);
}
@@ -283,7 +283,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 18e752439f6f..a4f738ac7728 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -136,7 +136,7 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
struct fq_flow *aux;
parent = *p;
- aux = container_of(parent, struct fq_flow, rate_node);
+ aux = rb_entry(parent, struct fq_flow, rate_node);
if (f->time_next_packet >= aux->time_next_packet)
p = &parent->rb_right;
else
@@ -188,7 +188,7 @@ static void fq_gc(struct fq_sched_data *q,
while (*p) {
parent = *p;
- f = container_of(parent, struct fq_flow, fq_node);
+ f = rb_entry(parent, struct fq_flow, fq_node);
if (f->sk == sk)
break;
@@ -245,7 +245,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
skb_orphan(skb);
}
- root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
+ root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
if (q->flows >= (2U << q->fq_trees_log) &&
q->inactive_flows > q->flows/2)
@@ -256,7 +256,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
while (*p) {
parent = *p;
- f = container_of(parent, struct fq_flow, fq_node);
+ f = rb_entry(parent, struct fq_flow, fq_node);
if (f->sk == sk) {
/* socket might have been reallocated, so check
* if its sk_hash is the same.
@@ -424,7 +424,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
q->time_next_delayed_flow = ~0ULL;
while ((p = rb_first(&q->delayed)) != NULL) {
- struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
+ struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node);
if (f->time_next_packet > now) {
q->time_next_delayed_flow = f->time_next_packet;
@@ -563,7 +563,7 @@ static void fq_reset(struct Qdisc *sch)
for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
root = &q->fq_root[idx];
while ((p = rb_first(root)) != NULL) {
- f = container_of(p, struct fq_flow, fq_node);
+ f = rb_entry(p, struct fq_flow, fq_node);
rb_erase(p, root);
fq_flow_purge(f);
@@ -593,20 +593,20 @@ static void fq_rehash(struct fq_sched_data *q,
oroot = &old_array[idx];
while ((op = rb_first(oroot)) != NULL) {
rb_erase(op, oroot);
- of = container_of(op, struct fq_flow, fq_node);
+ of = rb_entry(op, struct fq_flow, fq_node);
if (fq_gc_candidate(of)) {
fcnt++;
kmem_cache_free(fq_flow_cachep, of);
continue;
}
- nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
+ nroot = &new_array[hash_ptr(of->sk, new_log)];
np = &nroot->rb_node;
parent = NULL;
while (*np) {
parent = *np;
- nf = container_of(parent, struct fq_flow, fq_node);
+ nf = rb_entry(parent, struct fq_flow, fq_node);
BUG_ON(nf->sk == of->sk);
if (nf->sk > of->sk)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6cfb6e9038c2..6eb9c8e88519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -709,7 +709,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
- gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+ gen_kill_estimator(&qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 000f1d36128e..3ffaa6fb0990 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
@@ -1091,7 +1091,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->qdisc);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
kfree(cl);
}
@@ -1348,7 +1348,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.rtwork = cl->cl_cumul;
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c798d0de8a9d..760f39e7caee 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -111,7 +111,7 @@ struct htb_class {
unsigned int children;
struct htb_class *parent; /* parent class */
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
/*
* Written often fields
@@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
return -1;
@@ -1228,7 +1228,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
WARN_ON(!cl->un.leaf.q);
qdisc_destroy(cl->un.leaf.q);
}
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
tcf_destroy_chain(&cl->filter_list);
kfree(cl);
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9f7b380cf0a3..bcfadfdea8e0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -152,7 +152,7 @@ struct netem_skb_cb {
static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
{
- return container_of(rb, struct sk_buff, rbnode);
+ return rb_entry(rb, struct sk_buff, rbnode);
}
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -627,7 +627,7 @@ deliver:
* from the network (tstamp will be updated).
*/
if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
- skb->tstamp.tv64 = 0;
+ skb->tstamp = 0;
#endif
if (q->qdisc) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index ca0516e6f743..f9e712ce2d15 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -137,7 +137,7 @@ struct qfq_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct Qdisc *qdisc;
struct list_head alist; /* Link for active-classes list. */
struct qfq_aggregate *agg; /* Parent aggregate. */
@@ -508,7 +508,7 @@ set_change_agg:
new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
if (new_agg == NULL) {
err = -ENOBUFS;
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
goto destroy_class;
}
sch_tree_lock(sch);
@@ -533,7 +533,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
struct qfq_sched *q = qdisc_priv(sch);
qfq_rm_from_agg(q, cl);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
qdisc_destroy(cl->qdisc);
kfree(cl);
}
@@ -667,7 +667,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL,
&cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 2cd9b4478b92..b0196366d58d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -418,9 +418,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
struct teql_master *m = netdev_priv(dev);
struct Qdisc *q;
- if (new_mtu < 68)
- return -EINVAL;
-
q = m->slaves;
if (q) {
do {
@@ -460,6 +457,8 @@ static __init void teql_master_setup(struct net_device *dev)
dev->netdev_ops = &teql_netdev_ops;
dev->type = ARPHRD_VOID;
dev->mtu = 1500;
+ dev->min_mtu = 68;
+ dev->max_mtu = 65535;
dev->tx_queue_len = 100;
dev->flags = IFF_NOARP;
dev->hard_header_len = LL_MAX_HEADER;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f10d3397f917..d3cc30c25c41 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Set the peer's active state. */
peer->state = peer_state;
+ /* Add this peer into the transport hashtable */
+ if (sctp_hash_transport(peer)) {
+ sctp_transport_free(peer);
+ return NULL;
+ }
+
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
- /* Add this peer into the transport hashtable */
- sctp_hash_transport(peer);
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
@@ -1467,7 +1471,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
* threshold. The idea is to recover slowly, but up
* to the initial advertised window.
*/
- if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+ if (asoc->rwnd_press) {
int change = min(asoc->pathmtu, asoc->rwnd_press);
asoc->rwnd += change;
asoc->rwnd_press -= change;
@@ -1535,7 +1539,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
asoc->rwnd = 0;
}
} else {
- asoc->rwnd_over = len - asoc->rwnd;
+ asoc->rwnd_over += len - asoc->rwnd;
asoc->rwnd = 0;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 401c60750b20..1ebc184a0e23 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -292,6 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
}
af->from_addr_param(&addr, rawaddr, htons(port), 0);
+ if (sctp_bind_addr_state(bp, &addr) != -1)
+ goto next;
retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
SCTP_ADDR_SRC, gfp);
if (retval) {
@@ -300,6 +302,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
break;
}
+next:
len = ntohs(param->length);
addrs_len -= len;
raw_addr_list += len;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7a1cdf43e49d..615f0ddd41df 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
atomic_set(&msg->refcnt, 1);
msg->send_failed = 0;
msg->send_error = 0;
- msg->can_abandon = 0;
msg->can_delay = 1;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
@@ -182,20 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* Note: Calculate this outside of the loop, so that all fragments
* have the same expiration.
*/
- if (sinfo->sinfo_timetolive) {
- /* sinfo_timetolive is in milliseconds */
+ if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive &&
+ (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) ||
+ !SCTP_PR_POLICY(sinfo->sinfo_flags)))
msg->expires_at = jiffies +
msecs_to_jiffies(sinfo->sinfo_timetolive);
- msg->can_abandon = 1;
-
- pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__,
- msg, msg->expires_at, jiffies);
- }
-
- if (asoc->peer.prsctp_capable &&
- SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
- msg->expires_at =
- jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
/* This is the biggest possible DATA chunk that can fit into
* the packet
@@ -354,18 +344,8 @@ errout:
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- if (!chunk->asoc->peer.prsctp_capable ||
- !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
- struct sctp_datamsg *msg = chunk->msg;
-
- if (!msg->can_abandon)
- return 0;
-
- if (time_after(jiffies, msg->expires_at))
- return 1;
-
+ if (!chunk->asoc->peer.prsctp_capable)
return 0;
- }
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
@@ -378,6 +358,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
+ } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
+ chunk->msg->expires_at &&
+ time_after(jiffies, chunk->msg->expires_at)) {
+ return 1;
}
/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1f03065686fe..410ddc1e3443 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -331,7 +331,9 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
* on this endpoint.
*/
if (!ep->base.bind_addr.port)
- goto out;
+ return NULL;
+
+ rcu_read_lock();
t = sctp_epaddr_lookup_transport(ep, paddr);
if (!t)
goto out;
@@ -339,6 +341,7 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
*transport = t;
asoc = t->asoc;
out:
+ rcu_read_unlock();
return asoc;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a01a56ec8b8c..458e506ef84b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -790,10 +790,9 @@ hit:
/* rhashtable for transport */
struct sctp_hash_cmp_arg {
- const struct sctp_endpoint *ep;
- const union sctp_addr *laddr;
- const union sctp_addr *paddr;
- const struct net *net;
+ const union sctp_addr *paddr;
+ const struct net *net;
+ u16 lport;
};
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
{
struct sctp_transport *t = (struct sctp_transport *)ptr;
const struct sctp_hash_cmp_arg *x = arg->key;
- struct sctp_association *asoc;
int err = 1;
if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
@@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
if (!sctp_transport_hold(t))
return err;
- asoc = t->asoc;
- if (!net_eq(sock_net(asoc->base.sk), x->net))
+ if (!net_eq(sock_net(t->asoc->base.sk), x->net))
+ goto out;
+ if (x->lport != htons(t->asoc->base.bind_addr.port))
goto out;
- if (x->ep) {
- if (x->ep != asoc->ep)
- goto out;
- } else {
- if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
- goto out;
- if (!sctp_bind_addr_match(&asoc->base.bind_addr,
- x->laddr, sctp_sk(asoc->base.sk)))
- goto out;
- }
err = 0;
out:
@@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
const struct sctp_hash_cmp_arg *x = data;
const union sctp_addr *paddr = x->paddr;
const struct net *net = x->net;
- u16 lport;
+ u16 lport = x->lport;
u32 addr;
- lport = x->ep ? htons(x->ep->base.bind_addr.port) :
- x->laddr->v4.sin_port;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
@@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
int sctp_transport_hashtable_init(void)
{
- return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
+ return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
}
void sctp_transport_hashtable_destroy(void)
{
- rhashtable_destroy(&sctp_transport_hashtable);
+ rhltable_destroy(&sctp_transport_hashtable);
}
-void sctp_hash_transport(struct sctp_transport *t)
+int sctp_hash_transport(struct sctp_transport *t)
{
struct sctp_hash_cmp_arg arg;
+ int err;
if (t->asoc->temp)
- return;
+ return 0;
- arg.ep = t->asoc->ep;
- arg.paddr = &t->ipaddr;
arg.net = sock_net(t->asoc->base.sk);
+ arg.paddr = &t->ipaddr;
+ arg.lport = htons(t->asoc->base.bind_addr.port);
-reinsert:
- if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
- &t->node, sctp_hash_params) == -EBUSY)
- goto reinsert;
+ err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
+ &t->node, sctp_hash_params);
+ if (err)
+ pr_err_once("insert transport fail, errno %d\n", err);
+
+ return err;
}
void sctp_unhash_transport(struct sctp_transport *t)
@@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
if (t->asoc->temp)
return;
- rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
- sctp_hash_params);
+ rhltable_remove(&sctp_transport_hashtable, &t->node,
+ sctp_hash_params);
}
+/* return a transport with holding it */
struct sctp_transport *sctp_addrs_lookup_transport(
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
+ struct rhlist_head *tmp, *list;
+ struct sctp_transport *t;
struct sctp_hash_cmp_arg arg = {
- .ep = NULL,
- .laddr = laddr,
.paddr = paddr,
.net = net,
+ .lport = laddr->v4.sin_port,
};
- return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
- sctp_hash_params);
+ list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+ sctp_hash_params);
+
+ rhl_for_each_entry_rcu(t, tmp, list, node) {
+ if (!sctp_transport_hold(t))
+ continue;
+
+ if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
+ laddr, sctp_sk(t->asoc->base.sk)))
+ return t;
+ sctp_transport_put(t);
+ }
+
+ return NULL;
}
+/* return a transport without holding it, as it's only used under sock lock */
struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
struct net *net = sock_net(ep->base.sk);
+ struct rhlist_head *tmp, *list;
+ struct sctp_transport *t;
struct sctp_hash_cmp_arg arg = {
- .ep = ep,
.paddr = paddr,
.net = net,
+ .lport = htons(ep->base.bind_addr.port),
};
- return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
- sctp_hash_params);
+ list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+ sctp_hash_params);
+
+ rhl_for_each_entry_rcu(t, tmp, list, node)
+ if (ep == t->asoc->ep)
+ return t;
+
+ return NULL;
}
/* Look up an association. */
@@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association(
struct sctp_association *asoc = NULL;
t = sctp_addrs_lookup_transport(net, local, peer);
- if (!t || !sctp_transport_hold(t))
+ if (!t)
goto out;
asoc = t->asoc;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 176af3080a2b..64dfd35ccdcc 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -71,7 +71,7 @@
#include <net/inet_ecn.h>
#include <net/sctp/sctp.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
union sctp_addr *s2);
@@ -222,7 +222,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
rcu_read_lock();
- res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+ res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 7e869d0cca69..4f5a2b580aa5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
goto out;
}
- segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG);
if (IS_ERR(segs))
goto out;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 6cb0df859195..f5320a87341e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -399,186 +399,72 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
atomic_inc(&sk->sk_wmem_alloc);
}
-/* All packets are sent to the network through this function from
- * sctp_outq_tail().
- *
- * The return value is a normal kernel error return value.
- */
-int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
+static int sctp_packet_pack(struct sctp_packet *packet,
+ struct sk_buff *head, int gso, gfp_t gfp)
{
struct sctp_transport *tp = packet->transport;
- struct sctp_association *asoc = tp->asoc;
- struct sctphdr *sh;
- struct sk_buff *nskb = NULL, *head = NULL;
+ struct sctp_auth_chunk *auth = NULL;
struct sctp_chunk *chunk, *tmp;
- struct sock *sk;
- int err = 0;
- int padding; /* How much padding do we need? */
- int pkt_size;
- __u8 has_data = 0;
- int gso = 0;
- int pktcount = 0;
+ int pkt_count = 0, pkt_size;
+ struct sock *sk = head->sk;
+ struct sk_buff *nskb;
int auth_len = 0;
- struct dst_entry *dst;
- unsigned char *auth = NULL; /* pointer to auth in skb data */
-
- pr_debug("%s: packet:%p\n", __func__, packet);
-
- /* Do NOT generate a chunkless packet. */
- if (list_empty(&packet->chunk_list))
- return err;
-
- /* Set up convenience variables... */
- chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
- sk = chunk->skb->sk;
- /* Allocate the head skb, or main one if not in GSO */
- if (packet->size > tp->pathmtu && !packet->ipfragok) {
- if (sk_can_gso(sk)) {
- gso = 1;
- pkt_size = packet->overhead;
- } else {
- /* If this happens, we trash this packet and try
- * to build a new one, hopefully correct this
- * time. Application may notice this error.
- */
- pr_err_once("Trying to GSO but underlying device doesn't support it.");
- goto err;
- }
- } else {
- pkt_size = packet->size;
- }
- head = alloc_skb(pkt_size + MAX_HEADER, gfp);
- if (!head)
- goto err;
if (gso) {
- NAPI_GRO_CB(head)->last = head;
skb_shinfo(head)->gso_type = sk->sk_gso_type;
+ NAPI_GRO_CB(head)->last = head;
+ } else {
+ nskb = head;
+ pkt_size = packet->size;
+ goto merge;
}
- /* Make sure the outbound skb has enough header room reserved. */
- skb_reserve(head, packet->overhead + MAX_HEADER);
-
- /* Set the owning socket so that we know where to get the
- * destination IP address.
- */
- sctp_packet_set_owner_w(head, sk);
-
- if (!sctp_transport_dst_check(tp)) {
- sctp_transport_route(tp, NULL, sctp_sk(sk));
- if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
- sctp_assoc_sync_pmtu(sk, asoc);
- }
- }
- dst = dst_clone(tp->dst);
- if (!dst) {
- if (asoc)
- IP_INC_STATS(sock_net(asoc->base.sk),
- IPSTATS_MIB_OUTNOROUTES);
- goto nodst;
- }
- skb_dst_set(head, dst);
-
- /* Build the SCTP header. */
- sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
- skb_reset_transport_header(head);
- sh->source = htons(packet->source_port);
- sh->dest = htons(packet->destination_port);
-
- /* From 6.8 Adler-32 Checksum Calculation:
- * After the packet is constructed (containing the SCTP common
- * header and one or more control or DATA chunks), the
- * transmitter shall:
- *
- * 1) Fill in the proper Verification Tag in the SCTP common
- * header and initialize the checksum field to 0's.
- */
- sh->vtag = htonl(packet->vtag);
- sh->checksum = 0;
-
- pr_debug("***sctp_transmit_packet***\n");
-
do {
- /* Set up convenience variables... */
- chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
- pktcount++;
-
- /* Calculate packet size, so it fits in PMTU. Leave
- * other chunks for the next packets.
- */
- if (gso) {
- pkt_size = packet->overhead;
- list_for_each_entry(chunk, &packet->chunk_list, list) {
- int padded = SCTP_PAD4(chunk->skb->len);
-
- if (chunk == packet->auth)
- auth_len = padded;
- else if (auth_len + padded + packet->overhead >
- tp->pathmtu)
- goto nomem;
- else if (pkt_size + padded > tp->pathmtu)
- break;
- pkt_size += padded;
- }
+ /* calculate the pkt_size and alloc nskb */
+ pkt_size = packet->overhead;
+ list_for_each_entry_safe(chunk, tmp, &packet->chunk_list,
+ list) {
+ int padded = SCTP_PAD4(chunk->skb->len);
- /* Allocate a new skb. */
- nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
- if (!nskb)
- goto nomem;
-
- /* Make sure the outbound skb has enough header
- * room reserved.
- */
- skb_reserve(nskb, packet->overhead + MAX_HEADER);
- } else {
- nskb = head;
+ if (chunk == packet->auth)
+ auth_len = padded;
+ else if (auth_len + padded + packet->overhead >
+ tp->pathmtu)
+ return 0;
+ else if (pkt_size + padded > tp->pathmtu)
+ break;
+ pkt_size += padded;
}
+ nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!nskb)
+ return 0;
+ skb_reserve(nskb, packet->overhead + MAX_HEADER);
- /**
- * 3.2 Chunk Field Descriptions
- *
- * The total length of a chunk (including Type, Length and
- * Value fields) MUST be a multiple of 4 bytes. If the length
- * of the chunk is not a multiple of 4 bytes, the sender MUST
- * pad the chunk with all zero bytes and this padding is not
- * included in the chunk length field. The sender should
- * never pad with more than 3 bytes.
- *
- * [This whole comment explains SCTP_PAD4() below.]
- */
-
+merge:
+ /* merge chunks into nskb and append nskb into head list */
pkt_size -= packet->overhead;
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+ int padding;
+
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
-
- if (!chunk->resent && !tp->rto_pending) {
+ if (!sctp_chunk_retransmitted(chunk) &&
+ !tp->rto_pending) {
chunk->rtt_in_progress = 1;
tp->rto_pending = 1;
}
-
- has_data = 1;
}
padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
if (padding)
memset(skb_put(chunk->skb, padding), 0, padding);
- /* if this is the auth chunk that we are adding,
- * store pointer where it will be added and put
- * the auth into the packet.
- */
if (chunk == packet->auth)
- auth = skb_tail_pointer(nskb);
+ auth = (struct sctp_auth_chunk *)
+ skb_tail_pointer(nskb);
- memcpy(skb_put(nskb, chunk->skb->len),
- chunk->skb->data, chunk->skb->len);
+ memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data,
+ chunk->skb->len);
pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
chunk,
@@ -588,11 +474,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
ntohs(chunk->chunk_hdr->length), chunk->skb->len,
chunk->rtt_in_progress);
- /* If this is a control chunk, this is our last
- * reference. Free data chunks after they've been
- * acknowledged or have failed.
- * Re-queue auth chunks if needed.
- */
pkt_size -= SCTP_PAD4(chunk->skb->len);
if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
@@ -602,160 +483,161 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
break;
}
- /* SCTP-AUTH, Section 6.2
- * The sender MUST calculate the MAC as described in RFC2104 [2]
- * using the hash function H as described by the MAC Identifier and
- * the shared association key K based on the endpoint pair shared key
- * described by the shared key identifier. The 'data' used for the
- * computation of the AUTH-chunk is given by the AUTH chunk with its
- * HMAC field set to zero (as shown in Figure 6) followed by all
- * chunks that are placed after the AUTH chunk in the SCTP packet.
- */
- if (auth)
- sctp_auth_calculate_hmac(asoc, nskb,
- (struct sctp_auth_chunk *)auth,
- gfp);
-
- if (packet->auth) {
- if (!list_empty(&packet->chunk_list)) {
- /* We will generate more packets, so re-queue
- * auth chunk.
- */
+ if (auth) {
+ sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+ /* free auth if no more chunks, or add it back */
+ if (list_empty(&packet->chunk_list))
+ sctp_chunk_free(packet->auth);
+ else
list_add(&packet->auth->list,
&packet->chunk_list);
- } else {
- sctp_chunk_free(packet->auth);
- packet->auth = NULL;
- }
}
- if (!gso)
- break;
-
- if (skb_gro_receive(&head, nskb)) {
- kfree_skb(nskb);
- goto nomem;
+ if (gso) {
+ if (skb_gro_receive(&head, nskb)) {
+ kfree_skb(nskb);
+ return 0;
+ }
+ if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+ sk->sk_gso_max_segs))
+ return 0;
}
- nskb = NULL;
- if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
- sk->sk_gso_max_segs))
- goto nomem;
+
+ pkt_count++;
} while (!list_empty(&packet->chunk_list));
- /* 2) Calculate the Adler-32 checksum of the whole packet,
- * including the SCTP common header and all the
- * chunks.
- *
- * Note: Adler-32 is no longer applicable, as has been replaced
- * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
- *
- * If it's a GSO packet, it's postponed to sctp_skb_segment.
- */
- if (!sctp_checksum_disable || gso) {
- if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
- dst_xfrm(dst) || packet->ipfragok)) {
- sh->checksum = sctp_compute_cksum(head, 0);
- } else {
- /* no need to seed pseudo checksum for SCTP */
- head->ip_summed = CHECKSUM_PARTIAL;
- head->csum_start = skb_transport_header(head) - head->head;
- head->csum_offset = offsetof(struct sctphdr, checksum);
+ if (gso) {
+ memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
+ skb_shinfo(head)->gso_segs = pkt_count;
+ skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+ rcu_read_lock();
+ if (skb_dst(head) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
}
+ rcu_read_unlock();
+ goto chksum;
}
- /* IP layer ECN support
- * From RFC 2481
- * "The ECN-Capable Transport (ECT) bit would be set by the
- * data sender to indicate that the end-points of the
- * transport protocol are ECN-capable."
- *
- * Now setting the ECT bit all the time, as it should not cause
- * any problems protocol-wise even if our peer ignores it.
- *
- * Note: The works for IPv6 layer checks this bit too later
- * in transmission. See IP6_ECN_flow_xmit().
- */
- tp->af_specific->ecn_capable(sk);
+ if (sctp_checksum_disable)
+ return 1;
- /* Set up the IP options. */
- /* BUG: not implemented
- * For v4 this all lives somewhere in sk->sk_opt...
- */
+ if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) ||
+ dst_xfrm(skb_dst(head)) || packet->ipfragok) {
+ struct sctphdr *sh =
+ (struct sctphdr *)skb_transport_header(head);
- /* Dump that on IP! */
- if (asoc) {
- asoc->stats.opackets += pktcount;
- if (asoc->peer.last_sent_to != tp)
- /* Considering the multiple CPU scenario, this is a
- * "correcter" place for last_sent_to. --xguo
- */
- asoc->peer.last_sent_to = tp;
+ sh->checksum = sctp_compute_cksum(head, 0);
+ } else {
+chksum:
+ head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_start = skb_transport_header(head) - head->head;
+ head->csum_offset = offsetof(struct sctphdr, checksum);
}
- if (has_data) {
- struct timer_list *timer;
- unsigned long timeout;
+ return pkt_count;
+}
+
+/* All packets are sent to the network through this function from
+ * sctp_outq_tail().
+ *
+ * The return value is always 0 for now.
+ */
+int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
+{
+ struct sctp_transport *tp = packet->transport;
+ struct sctp_association *asoc = tp->asoc;
+ struct sctp_chunk *chunk, *tmp;
+ int pkt_count, gso = 0;
+ struct dst_entry *dst;
+ struct sk_buff *head;
+ struct sctphdr *sh;
+ struct sock *sk;
- /* Restart the AUTOCLOSE timer when sending data. */
- if (sctp_state(asoc, ESTABLISHED) &&
- asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
- timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
- timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+ pr_debug("%s: packet:%p\n", __func__, packet);
+ if (list_empty(&packet->chunk_list))
+ return 0;
+ chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+ sk = chunk->skb->sk;
- if (!mod_timer(timer, jiffies + timeout))
- sctp_association_hold(asoc);
+ /* check gso */
+ if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (!sk_can_gso(sk)) {
+ pr_err_once("Trying to GSO but underlying device doesn't support it.");
+ goto out;
}
+ gso = 1;
+ }
+
+ /* alloc head skb */
+ head = alloc_skb((gso ? packet->overhead : packet->size) +
+ MAX_HEADER, gfp);
+ if (!head)
+ goto out;
+ skb_reserve(head, packet->overhead + MAX_HEADER);
+ sctp_packet_set_owner_w(head, sk);
+
+ /* set sctp header */
+ sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+ skb_reset_transport_header(head);
+ sh->source = htons(packet->source_port);
+ sh->dest = htons(packet->destination_port);
+ sh->vtag = htonl(packet->vtag);
+ sh->checksum = 0;
+
+ /* update dst if in need */
+ if (!sctp_transport_dst_check(tp)) {
+ sctp_transport_route(tp, NULL, sctp_sk(sk));
+ if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(sk, asoc);
}
+ dst = dst_clone(tp->dst);
+ if (!dst) {
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb(head);
+ goto out;
+ }
+ skb_dst_set(head, dst);
+ /* pack up chunks */
+ pkt_count = sctp_packet_pack(packet, head, gso, gfp);
+ if (!pkt_count) {
+ kfree_skb(head);
+ goto out;
+ }
pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
- if (gso) {
- /* Cleanup our debris for IP stacks */
- memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
- sizeof(struct inet6_skb_parm)));
+ /* start autoclose timer */
+ if (packet->has_data && sctp_state(asoc, ESTABLISHED) &&
+ asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
+ struct timer_list *timer =
+ &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+ unsigned long timeout =
+ asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
- skb_shinfo(head)->gso_segs = pktcount;
- skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+ if (!mod_timer(timer, jiffies + timeout))
+ sctp_association_hold(asoc);
+ }
- /* We have to refresh this in case we are xmiting to
- * more than one transport at a time
- */
- rcu_read_lock();
- if (__sk_dst_get(sk) != tp->dst) {
- dst_hold(tp->dst);
- sk_setup_caps(sk, tp->dst);
- }
- rcu_read_unlock();
+ /* sctp xmit */
+ tp->af_specific->ecn_capable(sk);
+ if (asoc) {
+ asoc->stats.opackets += pkt_count;
+ if (asoc->peer.last_sent_to != tp)
+ asoc->peer.last_sent_to = tp;
}
head->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(head, tp);
- goto out;
-
-nomem:
- if (packet->auth && list_empty(&packet->auth->list))
- sctp_chunk_free(packet->auth);
-
-nodst:
- /* FIXME: Returning the 'err' will effect all the associations
- * associated with a socket, although only one of the paths of the
- * association is unreachable.
- * The real failure of a transport or association can be passed on
- * to the user via notifications. So setting this error may not be
- * required.
- */
- /* err = -EHOSTUNREACH; */
- kfree_skb(head);
-err:
+out:
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (!sctp_chunk_is_data(chunk))
sctp_chunk_free(chunk);
}
-
-out:
sctp_packet_reset(packet);
- return err;
+ return 0;
}
/********************************************************************
@@ -871,9 +753,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
rwnd = 0;
asoc->peer.rwnd = rwnd;
- /* Has been accepted for transmission. */
- if (!asoc->peer.prsctp_capable)
- chunk->msg->can_abandon = 0;
sctp_chunk_assign_tsn(chunk);
sctp_chunk_assign_ssn(chunk);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 582585393d35..34efaa4ef2f6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q,
transport->rto_pending = 0;
}
- chunk->resent = 1;
-
/* Move the chunk to the retransmit queue. The chunks
* on the retransmit queue are always kept in order.
*/
@@ -1050,7 +1048,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
(new_transport->state == SCTP_PF)))
new_transport = asoc->peer.active_path;
if (new_transport->state == SCTP_UNCONFIRMED) {
- WARN_ONCE(1, "Atempt to send packet on unconfirmed path.");
+ WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* instance).
*/
if (!tchunk->tsn_gap_acked &&
- !tchunk->resent &&
+ !sctp_chunk_retransmitted(tchunk) &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 7b523e3f551f..616a9428e0c4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -205,26 +205,30 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
if (!addr->valid)
continue;
- if (sctp_in_scope(net, &addr->a, scope)) {
- /* Now that the address is in scope, check to see if
- * the address type is really supported by the local
- * sock as well as the remote peer.
- */
- if ((((AF_INET == addr->a.sa.sa_family) &&
- (copy_flags & SCTP_ADDR4_PEERSUPP))) ||
- (((AF_INET6 == addr->a.sa.sa_family) &&
- (copy_flags & SCTP_ADDR6_ALLOWED) &&
- (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
- error = sctp_add_bind_addr(bp, &addr->a,
- sizeof(addr->a),
- SCTP_ADDR_SRC, GFP_ATOMIC);
- if (error)
- goto end_copy;
- }
- }
+ if (!sctp_in_scope(net, &addr->a, scope))
+ continue;
+
+ /* Now that the address is in scope, check to see if
+ * the address type is really supported by the local
+ * sock as well as the remote peer.
+ */
+ if (addr->a.sa.sa_family == AF_INET &&
+ !(copy_flags & SCTP_ADDR4_PEERSUPP))
+ continue;
+ if (addr->a.sa.sa_family == AF_INET6 &&
+ (!(copy_flags & SCTP_ADDR6_ALLOWED) ||
+ !(copy_flags & SCTP_ADDR6_PEERSUPP)))
+ continue;
+
+ if (sctp_bind_addr_state(bp, &addr->a) != -1)
+ continue;
+
+ error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a),
+ SCTP_ADDR_SRC, GFP_ATOMIC);
+ if (error)
+ break;
}
-end_copy:
rcu_read_unlock();
return error;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f23ad913dc7a..37eeab7899fc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -235,8 +235,12 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
sctp_assoc_t id)
{
struct sctp_association *addr_asoc = NULL, *id_asoc = NULL;
- struct sctp_transport *transport;
+ struct sctp_af *af = sctp_get_af_specific(addr->ss_family);
union sctp_addr *laddr = (union sctp_addr *)addr;
+ struct sctp_transport *transport;
+
+ if (sctp_verify_addr(sk, laddr, af->sockaddr_len))
+ return NULL;
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
laddr,
@@ -4392,10 +4396,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
{
int err;
- err = rhashtable_walk_init(&sctp_transport_hashtable, iter,
- GFP_KERNEL);
- if (err)
- return err;
+ rhltable_walk_enter(&sctp_transport_hashtable, iter);
err = rhashtable_walk_start(iter);
if (err && err != -EAGAIN) {
@@ -4475,18 +4476,17 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
const union sctp_addr *paddr, void *p)
{
struct sctp_transport *transport;
- int err = -ENOENT;
+ int err;
rcu_read_lock();
transport = sctp_addrs_lookup_transport(net, laddr, paddr);
- if (!transport || !sctp_transport_hold(transport))
- goto out;
-
rcu_read_unlock();
+ if (!transport)
+ return -ENOENT;
+
err = cb(transport, p);
sctp_transport_put(transport);
-out:
return err;
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ce54dce13ddb..a1652ab63918 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
*/
peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
- peer->last_time_heard = ktime_set(0, 0);
+ peer->last_time_heard = 0;
peer->last_time_ecne_reduced = jiffies;
peer->param_flags = SPP_HB_DISABLE |
diff --git a/net/socket.c b/net/socket.c
index 73dc69f9681e..0758e13754e2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,7 +90,7 @@
#include <linux/slab.h>
#include <linux/xattr.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <net/compat.h>
@@ -533,8 +533,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
+static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ int err = simple_setattr(dentry, iattr);
+
+ if (!err && (iattr->ia_valid & ATTR_UID)) {
+ struct socket *sock = SOCKET_I(d_inode(dentry));
+
+ sock->sk->sk_uid = iattr->ia_uid;
+ }
+
+ return err;
+}
+
static const struct inode_operations sockfs_inode_ops = {
.listxattr = sockfs_listxattr,
+ .setattr = sockfs_setattr,
};
/**
@@ -654,7 +668,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
- if (need_software_tstamp && skb->tstamp.tv64 == 0)
+ if (need_software_tstamp && skb->tstamp == 0)
__net_timestamp(skb);
if (need_software_tstamp) {
@@ -679,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
empty = 0;
- if (!empty)
+ if (!empty) {
put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(tss), &tss);
+
+ if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
+ skb->len, skb->data);
+ }
}
EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
@@ -892,6 +911,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
+static struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return &get_net(container_of(ns, struct net, ns))->ns;
+}
+
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
@@ -960,6 +984,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
err = dlci_ioctl_hook(cmd, argp);
mutex_unlock(&dlci_ioctl_mutex);
break;
+ case SIOCGSKNS:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+
+ err = open_related_ns(&net->ns, get_net_ns);
+ break;
default:
err = sock_do_ioctl(net, sock, cmd, arg);
break;
@@ -1887,7 +1918,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
struct sockaddr_storage address;
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
unsigned char ctl[sizeof(struct cmsghdr) + 20]
- __attribute__ ((aligned(sizeof(__kernel_size_t))));
+ __aligned(sizeof(__kernel_size_t));
/* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl;
int ctl_len;
@@ -3110,6 +3141,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFVLAN:
case SIOCADDDLCI:
case SIOCDELDLCI:
+ case SIOCGSKNS:
return sock_ioctl(file, cmd, arg);
case SIOCGIFFLAGS:
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3dfd769dc5b5..cdeb1d814833 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -50,7 +50,7 @@
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/gss_api.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/hashtable.h>
#include "../netns.h"
@@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
return gss_new;
gss_msg = gss_add_msg(gss_new);
if (gss_msg == gss_new) {
- int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg);
+ int res;
+ atomic_inc(&gss_msg->count);
+ res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg);
if (res) {
gss_unhash_msg(gss_new);
+ atomic_dec(&gss_msg->count);
+ gss_release_msg(gss_new);
gss_msg = ERR_PTR(res);
}
} else
@@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
warn_gssd();
gss_release_msg(gss_msg);
}
+ gss_release_msg(gss_msg);
}
static void gss_pipe_dentry_destroy(struct dentry *dir,
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 90115ceefd49..fb39284ec174 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -200,7 +200,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(hmac_md5))
goto out_free_md5;
- req = ahash_request_alloc(md5, GFP_KERNEL);
+ req = ahash_request_alloc(md5, GFP_NOFS);
if (!req)
goto out_free_hmac_md5;
@@ -230,7 +230,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
goto out;
ahash_request_free(req);
- req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
+ req = ahash_request_alloc(hmac_md5, GFP_NOFS);
if (!req)
goto out_free_hmac_md5;
@@ -299,7 +299,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
- req = ahash_request_alloc(tfm, GFP_KERNEL);
+ req = ahash_request_alloc(tfm, GFP_NOFS);
if (!req)
goto out_free_ahash;
@@ -397,7 +397,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
goto out_free_cksum;
checksumlen = crypto_ahash_digestsize(tfm);
- req = ahash_request_alloc(tfm, GFP_KERNEL);
+ req = ahash_request_alloc(tfm, GFP_NOFS);
if (!req)
goto out_free_ahash;
@@ -963,7 +963,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
}
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
- GFP_KERNEL);
+ GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate shash descriptor for '%s'\n",
__func__, kctx->gk5e->cksum_name);
@@ -1030,7 +1030,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
}
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
- GFP_KERNEL);
+ GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate shash descriptor for '%s'\n",
__func__, kctx->gk5e->cksum_name);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 60595835317a..7bb2514aadd9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -451,8 +451,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
goto out_err_free_hmac;
- desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
- GFP_KERNEL);
+ desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate hash descriptor for '%s'\n",
__func__, ctx->gk5e->cksum_name);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 45662d7f0943..153082598522 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
- rsci->h.expiry_time = get_seconds();
+ rsci->h.expiry_time = seconds_since_boot();
set_bit(CACHE_NEGATIVE, &rsci->h.flags);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
@@ -1548,7 +1548,7 @@ complete:
ret = SVC_COMPLETE;
goto out;
drop:
- ret = SVC_DROP;
+ ret = SVC_CLOSE;
out:
if (rsci)
cache_put(&rsci->h, sn->rsc_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8aabe12201f8..8147e8d56eb2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -21,7 +21,7 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/string_helpers.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/poll.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 62a482790937..1dc9f3bac099 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -336,6 +336,11 @@ out:
static DEFINE_IDA(rpc_clids);
+void rpc_cleanup_clids(void)
+{
+ ida_destroy(&rpc_clids);
+}
+
static int rpc_alloc_clid(struct rpc_clnt *clnt)
{
int clid;
@@ -1926,6 +1931,8 @@ call_connect_status(struct rpc_task *task)
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
+ xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
+ task->tk_rqstp->rq_connect_cookie);
if (RPC_IS_SOFTCONN(task))
break;
/* retry with existing socket, after a delay */
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index df5826876535..394ce523174c 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -34,7 +34,7 @@ struct sunrpc_net {
struct proc_dir_entry *use_gssp_proc;
};
-extern int sunrpc_net_id;
+extern unsigned int sunrpc_net_id;
int ip_map_cache_create(struct net *);
void ip_map_cache_destroy(struct net *);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 2ecb994314c1..caeb01ad2b5a 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -157,15 +157,17 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
spin_lock(&op_metrics->om_lock);
op_metrics->om_ops++;
- op_metrics->om_ntrans += req->rq_ntrans;
+ /* kernel API: om_ops must never become larger than om_ntrans */
+ op_metrics->om_ntrans += max(req->rq_ntrans, 1);
op_metrics->om_timeouts += task->tk_timeouts;
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
- delta = ktime_sub(req->rq_xtime, task->tk_start);
- op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
-
+ if (ktime_to_ns(req->rq_xtime)) {
+ delta = ktime_sub(req->rq_xtime, task->tk_start);
+ op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+ }
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
delta = ktime_sub(now, task->tk_start);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index ee5d3d253102..c73de181467a 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -24,7 +24,7 @@
#include "netns.h"
-int sunrpc_net_id;
+unsigned int sunrpc_net_id;
EXPORT_SYMBOL_GPL(sunrpc_net_id);
static __net_init int sunrpc_init_net(struct net *net)
@@ -119,6 +119,7 @@ out:
static void __exit
cleanup_sunrpc(void)
{
+ rpc_cleanup_clids();
rpcauth_remove_module();
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7c8070ec93c8..75f290bddca1 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1155,8 +1155,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
- if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ goto close;
case SVC_DROP:
goto dropit;
case SVC_COMPLETE:
@@ -1246,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
sendit:
if (svc_authorise(rqstp))
- goto dropit;
+ goto close;
return 1; /* Caller can now send it */
dropit:
@@ -1254,11 +1253,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
dprintk("svc: svc_process dropit\n");
return 0;
+ close:
+ if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+ svc_close_xprt(rqstp->rq_xprt);
+ dprintk("svc: svc_process close\n");
+ return 0;
+
err_short_len:
svc_printk(rqstp, "short len %Zd, dropping request\n",
argv->iov_len);
-
- goto dropit; /* drop request */
+ goto close;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 3bc1d61694cb..9c9db55a0c1e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc_recv: found XPT_CLOSE\n");
+ if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
+ xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_delete_xprt(xprt);
/* Leave XPT_BUSY set on the dead xprt: */
goto out;
@@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
le = to_be_closed.next;
list_del_init(le);
xprt = list_entry(le, struct svc_xprt, xpt_list);
- dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
- xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
- svc_close_xprt(xprt);
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ set_bit(XPT_KILL_TEMP, &xprt->xpt_flags);
+ dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n",
+ xprt);
+ svc_xprt_enqueue(xprt);
}
}
EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 69841db1f533..e112da8005b5 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -124,8 +124,7 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
#define DN_HASHMAX (1<<DN_HASHBITS)
static struct hlist_head auth_domain_table[DN_HASHMAX];
-static spinlock_t auth_domain_lock =
- __SPIN_LOCK_UNLOCKED(auth_domain_lock);
+static DEFINE_SPINLOCK(auth_domain_lock);
void auth_domain_put(struct auth_domain *dom)
{
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a4bc98265d88..de066acdb34e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -39,9 +39,10 @@
#include <net/checksum.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <trace/events/skb.h>
@@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp)
}
}
+static void svc_release_udp_skb(struct svc_rqst *rqstp)
+{
+ struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+
+ if (skb) {
+ rqstp->rq_xprt_ctxt = NULL;
+
+ dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
+ consume_skb(skb);
+ }
+}
+
union svc_pktinfo_u {
struct in_pktinfo pkti;
struct in6_pktinfo pkti6;
@@ -549,7 +562,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err >= 0)
- skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
+ skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
if (skb == NULL) {
if (err != -EAGAIN) {
@@ -561,7 +574,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
}
len = svc_addr_len(svc_addr(rqstp));
rqstp->rq_addrlen = len;
- if (skb->tstamp.tv64 == 0) {
+ if (skb->tstamp == 0) {
skb->tstamp = ktime_get_real();
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
@@ -590,7 +603,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
goto out_free;
}
local_bh_enable();
- skb_free_datagram_locked(svsk->sk_sk, skb);
+ consume_skb(skb);
} else {
/* we can use it in-place */
rqstp->rq_arg.head[0].iov_base = skb->data;
@@ -617,8 +630,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
return len;
out_free:
- trace_kfree_skb(skb, svc_udp_recvfrom);
- skb_free_datagram_locked(svsk->sk_sk, skb);
+ kfree_skb(skb);
return 0;
}
@@ -679,7 +691,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
- .xpo_release_rqst = svc_release_skb,
+ .xpo_release_rqst = svc_release_udp_skb,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index c88d9bc06f5c..8c3936403fea 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -14,7 +14,7 @@
#include <linux/sysctl.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 685e6d225414..9a6be030ca7d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -669,7 +669,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
spin_lock_bh(&xprt->transport_lock);
if (cookie != xprt->connect_cookie)
goto out;
- if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt))
+ if (test_bit(XPRT_CLOSING, &xprt->state))
goto out;
set_bit(XPRT_CLOSE_WAIT, &xprt->state);
/* Try to schedule an autoclose RPC call */
@@ -772,6 +772,7 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_connected(xprt)) {
task->tk_rqstp->rq_bytes_sent = 0;
task->tk_timeout = task->tk_rqstp->rq_timeout;
+ task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
if (test_bit(XPRT_CLOSING, &xprt->state))
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2c472e1b4827..24fedd4b117e 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -55,7 +55,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
if (IS_ERR(rb))
goto out_fail;
req->rl_sendbuf = rb;
- xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size);
+ xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
+ min_t(size_t, size, PAGE_SIZE));
rpcrdma_set_xprtdata(rqst, req);
return 0;
@@ -191,6 +192,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
size_t maxmsg;
maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
+ maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 26b26beef2d4..47bed5333c7f 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
struct rpcrdma_frmr *f = &r->frmr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth);
+ f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
@@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG,
+ f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
ia->ri_max_frmr_depth);
if (IS_ERR(f->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
@@ -171,10 +171,6 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
}
/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
- *
- * There's no recovery if this fails. The FRMR is abandoned, but
- * remains in rb_all. It will be cleaned up when the transport is
- * destroyed.
*/
static void
frwr_op_recover_mr(struct rpcrdma_mw *mw)
@@ -210,11 +206,16 @@ static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
+ struct ib_device_attr *attrs = &ia->ri_device->attrs;
int depth, delta;
+ ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
+ if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
+
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- ia->ri_device->attrs.max_fast_reg_page_list_len);
+ attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
@@ -241,8 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
- cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
+ cdata->max_requests = attrs->max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -348,6 +349,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
@@ -383,8 +385,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
++seg;
++i;
-
- /* Check for holes */
+ if (holes_ok)
+ continue;
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
@@ -421,7 +423,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- DECR_CQCOUNT(&r_xprt->rx_ep);
+ rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
@@ -451,26 +453,6 @@ out_senderr:
return -ENOTCONN;
}
-static struct ib_send_wr *
-__frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
-{
- struct rpcrdma_frmr *f = &mw->frmr;
- struct ib_send_wr *invalidate_wr;
-
- dprintk("RPC: %s: invalidating frmr %p\n", __func__, f);
-
- f->fr_state = FRMR_IS_INVALID;
- invalidate_wr = &f->fr_invwr;
-
- memset(invalidate_wr, 0, sizeof(*invalidate_wr));
- f->fr_cqe.done = frwr_wc_localinv;
- invalidate_wr->wr_cqe = &f->fr_cqe;
- invalidate_wr->opcode = IB_WR_LOCAL_INV;
- invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
-
- return invalidate_wr;
-}
-
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
@@ -481,12 +463,12 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
- struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
+ struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw, *tmp;
struct rpcrdma_frmr *f;
- int rc;
+ int count, rc;
dprintk("RPC: %s: req %p\n", __func__, req);
@@ -496,22 +478,29 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* a single ib_post_send() call.
*/
f = NULL;
- invalidate_wrs = pos = prev = NULL;
+ count = 0;
+ prev = &first;
list_for_each_entry(mw, &req->rl_registered, mw_list) {
+ mw->frmr.fr_state = FRMR_IS_INVALID;
+
if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
- (mw->mw_handle == rep->rr_inv_rkey)) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ (mw->mw_handle == rep->rr_inv_rkey))
continue;
- }
-
- pos = __frwr_prepare_linv_wr(mw);
- if (!invalidate_wrs)
- invalidate_wrs = pos;
- else
- prev->next = pos;
- prev = pos;
f = &mw->frmr;
+ dprintk("RPC: %s: invalidating frmr %p\n",
+ __func__, f);
+
+ f->fr_cqe.done = frwr_wc_localinv;
+ last = &f->fr_invwr;
+ memset(last, 0, sizeof(*last));
+ last->wr_cqe = &f->fr_cqe;
+ last->opcode = IB_WR_LOCAL_INV;
+ last->ex.invalidate_rkey = mw->mw_handle;
+ count++;
+
+ *prev = last;
+ prev = &last->next;
}
if (!f)
goto unmap;
@@ -520,17 +509,22 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- f->fr_invwr.send_flags = IB_SEND_SIGNALED;
+ last->send_flags = IB_SEND_SIGNALED;
f->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done);
- INIT_CQCOUNT(&r_xprt->rx_ep);
+
+ /* Initialize CQ count, since there is always a signaled
+ * WR being posted here. The new cqcount depends on how
+ * many SQEs are about to be consumed.
+ */
+ rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
r_xprt->rx_stats.local_inv_needed++;
- rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (rc)
goto reset_mrs;
@@ -541,7 +535,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
unmap:
list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
- dprintk("RPC: %s: unmapping frmr %p\n",
+ dprintk("RPC: %s: DMA unmapping frmr %p\n",
__func__, &mw->frmr);
list_del_init(&mw->mw_list);
ib_dma_unmap_sg(ia->ri_device,
@@ -559,7 +553,7 @@ reset_mrs:
*/
list_for_each_entry(mw, &req->rl_registered, mw_list) {
f = &mw->frmr;
- if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) {
+ if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
__frwr_reset_mr(ia, mw);
bad_wr = bad_wr->next;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d987c2d3dd6e..c52e0f2ffe52 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -786,7 +786,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp)
ifdebug(FACILITY) {
u64 off;
xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
- dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
+ dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n",
__func__,
be32_to_cpu(seg->rs_length),
(unsigned long long)off,
@@ -906,28 +906,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
-void
-rpcrdma_connect_worker(struct work_struct *work)
-{
- struct rpcrdma_ep *ep =
- container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
- struct rpcrdma_xprt *r_xprt =
- container_of(ep, struct rpcrdma_xprt, rx_ep);
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
- spin_lock_bh(&xprt->transport_lock);
- if (++xprt->connect_cookie == 0) /* maintain a reserved value */
- ++xprt->connect_cookie;
- if (ep->rep_connected > 0) {
- if (!xprt_test_and_set_connected(xprt))
- xprt_wake_pending_tasks(xprt, 0);
- } else {
- if (xprt_test_and_clear_connected(xprt))
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
- }
- spin_unlock_bh(&xprt->transport_lock);
-}
-
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
@@ -959,18 +937,6 @@ rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/*
- * This function is called when an async event is posted to
- * the connection which changes the connection state. All it
- * does at this point is mark the connection up/down, the rpc
- * timers do the rest.
- */
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
- schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 20027f8de129..288e35c2d8f4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -164,13 +164,9 @@ static int
xprt_rdma_bc_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
- struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
size_t size = rqst->rq_callsize;
- struct svcxprt_rdma *rdma;
struct page *page;
- rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
-
if (size > PAGE_SIZE) {
WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
size);
@@ -359,6 +355,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
out_fail:
xprt_rdma_free_addresses(xprt);
args->bc_xprt->xpt_bc_xprt = NULL;
+ args->bc_xprt->xpt_bc_xps = NULL;
xprt_put(xprt);
xprt_free(xprt);
return ERR_PTR(-EINVAL);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ad1df979b3f0..172b537f8cfc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
frmr->sg);
return -ENOMEM;
}
- atomic_inc(&xprt->sc_dma_used);
n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
if (unlikely(n != frmr->sg_nents)) {
@@ -348,8 +347,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
atomic_inc(&rdma_stat_read);
return ret;
err:
- ib_dma_unmap_sg(xprt->sc_cm_id->device,
- frmr->sg, frmr->sg_nents, frmr->direction);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
@@ -374,9 +371,7 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
u32 position, u32 byte_count, u32 page_offset, int page_no)
{
char *srcp, *destp;
- int ret;
- ret = 0;
srcp = head->arg.head[0].iov_base + position;
byte_count = head->arg.head[0].iov_len - position;
if (byte_count > PAGE_SIZE) {
@@ -415,6 +410,20 @@ done:
return 1;
}
+/* Returns the address of the first read chunk or <nul> if no read chunk
+ * is present
+ */
+static struct rpcrdma_read_chunk *
+svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
+{
+ struct rpcrdma_read_chunk *ch =
+ (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+
+ if (ch->rc_discrim == xdr_zero)
+ return NULL;
+ return ch;
+}
+
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
@@ -627,8 +636,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto defer;
goto out;
}
- dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
- ctxt, rdma_xprt, rqstp, ctxt->wc_status);
+ dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
+ ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index f5a91edcd233..ad4d286a83c5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
return dma_addr;
}
-/* Returns the address of the first read chunk or <nul> if no read chunk
- * is present
+/* Parse the RPC Call's transport header.
*/
-struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
+static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
+ struct rpcrdma_write_array **write,
+ struct rpcrdma_write_array **reply)
{
- struct rpcrdma_read_chunk *ch =
- (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+ __be32 *p;
- if (ch->rc_discrim == xdr_zero)
- return NULL;
- return ch;
-}
+ p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
-/* Returns the address of the first read write array element or <nul>
- * if no write array list is present
- */
-static struct rpcrdma_write_array *
-svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
-{
- if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
- rmsgp->rm_body.rm_chunks[1] == xdr_zero)
- return NULL;
- return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
-}
+ /* Read list */
+ while (*p++ != xdr_zero)
+ p += 5;
-/* Returns the address of the first reply array element or <nul> if no
- * reply array is present
- */
-static struct rpcrdma_write_array *
-svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
- struct rpcrdma_write_array *wr_ary)
-{
- struct rpcrdma_read_chunk *rch;
- struct rpcrdma_write_array *rp_ary;
-
- /* XXX: Need to fix when reply chunk may occur with read list
- * and/or write list.
- */
- if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
- rmsgp->rm_body.rm_chunks[1] != xdr_zero)
- return NULL;
-
- rch = svc_rdma_get_read_chunk(rmsgp);
- if (rch) {
- while (rch->rc_discrim != xdr_zero)
- rch++;
-
- /* The reply chunk follows an empty write array located
- * at 'rc_position' here. The reply array is at rc_target.
- */
- rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
- goto found_it;
- }
-
- if (wr_ary) {
- int chunk = be32_to_cpu(wr_ary->wc_nchunks);
-
- rp_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[chunk].wc_target.rs_length;
- goto found_it;
+ /* Write list */
+ if (*p != xdr_zero) {
+ *write = (struct rpcrdma_write_array *)p;
+ while (*p++ != xdr_zero)
+ p += 1 + be32_to_cpu(*p) * 4;
+ } else {
+ *write = NULL;
+ p++;
}
- /* No read list, no write list */
- rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
-
- found_it:
- if (rp_ary->wc_discrim == xdr_zero)
- return NULL;
- return rp_ary;
+ /* Reply chunk */
+ if (*p != xdr_zero)
+ *reply = (struct rpcrdma_write_array *)p;
+ else
+ *reply = NULL;
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -240,31 +199,22 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
{
struct rpcrdma_read_chunk *rd_ary;
struct rpcrdma_segment *arg_ch;
- u32 inv_rkey;
- inv_rkey = 0;
-
- rd_ary = svc_rdma_get_read_chunk(rdma_argp);
- if (rd_ary) {
- inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
- goto out;
- }
+ rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
+ if (rd_ary->rc_discrim != xdr_zero)
+ return be32_to_cpu(rd_ary->rc_target.rs_handle);
if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
arg_ch = &wr_ary->wc_array[0].wc_target;
- inv_rkey = be32_to_cpu(arg_ch->rs_handle);
- goto out;
+ return be32_to_cpu(arg_ch->rs_handle);
}
if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
arg_ch = &rp_ary->wc_array[0].wc_target;
- inv_rkey = be32_to_cpu(arg_ch->rs_handle);
- goto out;
+ return be32_to_cpu(arg_ch->rs_handle);
}
-out:
- dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
- return inv_rkey;
+ return 0;
}
/* Assumptions:
@@ -622,8 +572,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
* places this at the start of page 0.
*/
rdma_argp = page_address(rqstp->rq_pages[0]);
- wr_ary = svc_rdma_get_write_array(rdma_argp);
- rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
+ svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
inv_rkey = 0;
if (rdma->sc_snd_w_inv)
@@ -636,7 +585,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
goto err0;
inline_bytes = rqstp->rq_res.len;
- /* Create the RDMA response header */
+ /* Create the RDMA response header. xprt->xpt_mutex,
+ * acquired in svc_send(), serializes RPC replies. The
+ * code path below that inserts the credit grant value
+ * into each transport header runs only inside this
+ * critical section.
+ */
ret = -ENOMEM;
res_page = alloc_page(GFP_KERNEL);
if (!res_page)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 1334de2715c2..ca2799af05a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -41,6 +41,7 @@
*/
#include <linux/sunrpc/svc_xprt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/interrupt.h>
@@ -226,25 +227,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
struct svcxprt_rdma *xprt = ctxt->xprt;
struct ib_device *device = xprt->sc_cm_id->device;
u32 lkey = xprt->sc_pd->local_dma_lkey;
- unsigned int i, count;
+ unsigned int i;
- for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
+ for (i = 0; i < ctxt->mapped_sges; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == lkey) {
- count++;
+ if (ctxt->sge[i].lkey == lkey)
ib_dma_unmap_page(device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
- }
}
ctxt->mapped_sges = 0;
- atomic_sub(count, &xprt->sc_dma_used);
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -398,7 +396,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
- ctxt->wc_status = wc->status;
svc_rdma_unmap_dma(ctxt);
if (wc->status != IB_WC_SUCCESS)
@@ -436,7 +433,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
goto err;
out:
- atomic_dec(&xprt->sc_sq_count);
+ atomic_inc(&xprt->sc_sq_avail);
wake_up(&xprt->sc_send_wait);
return;
@@ -946,7 +943,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
ib_dma_unmap_sg(rdma->sc_cm_id->device,
frmr->sg, frmr->sg_nents, frmr->direction);
- atomic_dec(&rdma->sc_dma_used);
spin_lock_bh(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
@@ -973,6 +969,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
+ struct sockaddr *sap;
unsigned int i;
int ret = 0;
@@ -1010,6 +1007,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+ atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
@@ -1052,18 +1050,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
qp_attr.recv_cq = newxprt->sc_rq_cq;
- dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n"
- " cm_id->device=%p, sc_pd->device=%p\n"
- " cap.max_send_wr = %d\n"
- " cap.max_recv_wr = %d\n"
- " cap.max_send_sge = %d\n"
- " cap.max_recv_sge = %d\n",
- newxprt->sc_cm_id, newxprt->sc_pd,
- dev, newxprt->sc_pd->device,
- qp_attr.cap.max_send_wr,
- qp_attr.cap.max_recv_wr,
- qp_attr.cap.max_send_sge,
- qp_attr.cap.max_recv_sge);
+ dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
+ newxprt->sc_cm_id, newxprt->sc_pd);
+ dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
+ qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
+ dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
+ qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
@@ -1146,31 +1138,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
}
- dprintk("svcrdma: new connection %p accepted with the following "
- "attributes:\n"
- " local_ip : %pI4\n"
- " local_port : %d\n"
- " remote_ip : %pI4\n"
- " remote_port : %d\n"
- " max_sge : %d\n"
- " max_sge_rd : %d\n"
- " sq_depth : %d\n"
- " max_requests : %d\n"
- " ord : %d\n",
- newxprt,
- &((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.src_addr)->sin_addr.s_addr,
- ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.src_addr)->sin_port),
- &((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.dst_addr)->sin_addr.s_addr,
- ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.dst_addr)->sin_port),
- newxprt->sc_max_sge,
- newxprt->sc_max_sge_rd,
- newxprt->sc_sq_depth,
- newxprt->sc_max_requests,
- newxprt->sc_ord);
+ dprintk("svcrdma: new connection %p accepted:\n", newxprt);
+ sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+ dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
+ sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+ dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
+ dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
+ dprintk(" max_sge_rd : %d\n", newxprt->sc_max_sge_rd);
+ dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
+ dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
+ dprintk(" ord : %d\n", newxprt->sc_ord);
return &newxprt->sc_xprt;
@@ -1257,9 +1234,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
rdma->sc_ctxt_used);
- if (atomic_read(&rdma->sc_dma_used) != 0)
- pr_err("svcrdma: dma still in use? (%d)\n",
- atomic_read(&rdma->sc_dma_used));
/* Final put of backchannel client transport */
if (xprt->xpt_bc_xprt) {
@@ -1339,15 +1313,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
- spin_lock_bh(&xprt->sc_lock);
- if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
- spin_unlock_bh(&xprt->sc_lock);
+ if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
atomic_inc(&rdma_stat_sq_starve);
/* Wait until SQ WR available if SQ still full */
+ atomic_add(wr_count, &xprt->sc_sq_avail);
wait_event(xprt->sc_send_wait,
- atomic_read(&xprt->sc_sq_count) <
- xprt->sc_sq_depth);
+ atomic_read(&xprt->sc_sq_avail) > wr_count);
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
return -ENOTCONN;
continue;
@@ -1357,21 +1329,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
svc_xprt_get(&xprt->sc_xprt);
/* Bump used SQ WR count and post */
- atomic_add(wr_count, &xprt->sc_sq_count);
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
if (ret) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- atomic_sub(wr_count, &xprt->sc_sq_count);
for (i = 0; i < wr_count; i ++)
svc_xprt_put(&xprt->sc_xprt);
- dprintk("svcrdma: failed to post SQ WR rc=%d, "
- "sc_sq_count=%d, sc_sq_depth=%d\n",
- ret, atomic_read(&xprt->sc_sq_count),
- xprt->sc_sq_depth);
- }
- spin_unlock_bh(&xprt->sc_lock);
- if (ret)
+ dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
+ dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n",
+ atomic_read(&xprt->sc_sq_avail),
+ xprt->sc_sq_depth);
wake_up(&xprt->sc_send_wait);
+ }
break;
}
return ret;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ed5e285fd2ea..534c178d2a7e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -219,6 +219,34 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
}
}
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+ schedule_delayed_work(&ep->rep_connect_worker, 0);
+}
+
+void
+rpcrdma_connect_worker(struct work_struct *work)
+{
+ struct rpcrdma_ep *ep =
+ container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ep, struct rpcrdma_xprt, rx_ep);
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (++xprt->connect_cookie == 0) /* maintain a reserved value */
+ ++xprt->connect_cookie;
+ if (ep->rep_connected > 0) {
+ if (!xprt_test_and_set_connected(xprt))
+ xprt_wake_pending_tasks(xprt, 0);
+ } else {
+ if (xprt_test_and_clear_connected(xprt))
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ }
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
static void
xprt_rdma_connect_worker(struct work_struct *work)
{
@@ -621,7 +649,8 @@ xprt_rdma_free(struct rpc_task *task)
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
+ if (unlikely(!list_empty(&req->rl_registered)))
+ ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req);
}
@@ -657,7 +686,8 @@ xprt_rdma_send_request(struct rpc_task *task)
int rc = 0;
/* On retransmit, remove any previously registered chunks */
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+ if (unlikely(!list_empty(&req->rl_registered)))
+ r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
rc = rpcrdma_marshal_req(rqst);
if (rc < 0)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ec74289af7ec..11d07748f699 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -103,9 +103,9 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
- pr_err("RPC: %s: %s on device %s ep %p\n",
- __func__, ib_event_msg(event->event),
- event->device->name, context);
+ pr_err("rpcrdma: %s on device %s ep %p\n",
+ ib_event_msg(event->event), event->device->name, context);
+
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
rpcrdma_conn_func(ep);
@@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
cdata->inline_rsize = rsize;
if (wsize < cdata->inline_wsize)
cdata->inline_wsize = wsize;
- pr_info("rpcrdma: max send %u, max recv %u\n",
- cdata->inline_wsize, cdata->inline_rsize);
+ dprintk("RPC: %s: max send %u, max recv %u\n",
+ __func__, cdata->inline_wsize, cdata->inline_rsize);
rpcrdma_set_max_header_sizes(r_xprt);
}
@@ -331,6 +331,7 @@ static struct rdma_cm_id *
rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rpcrdma_ia *ia, struct sockaddr *addr)
{
+ unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id;
int rc;
@@ -352,8 +353,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
__func__, rc);
goto out;
}
- wait_for_completion_interruptible_timeout(&ia->ri_done,
- msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
+ rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
+ if (rc < 0) {
+ dprintk("RPC: %s: wait() exited: %i\n",
+ __func__, rc);
+ goto out;
+ }
/* FIXME:
* Until xprtrdma supports DEVICE_REMOVAL, the provider must
@@ -376,8 +381,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
__func__, rc);
goto put;
}
- wait_for_completion_interruptible_timeout(&ia->ri_done,
- msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
+ rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
+ if (rc < 0) {
+ dprintk("RPC: %s: wait() exited: %i\n",
+ __func__, rc);
+ goto put;
+ }
rc = ia->ri_async_rc;
if (rc)
goto put;
@@ -532,7 +541,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0; /* always signal? */
- INIT_CQCOUNT(ep);
+ rpcrdma_init_cqcount(ep, 0);
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
@@ -1311,13 +1320,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);
- if (DECR_CQCOUNT(ep) > 0)
- send_wr->send_flags = 0;
- else { /* Provider must take a send completion every now and then */
- INIT_CQCOUNT(ep);
- send_wr->send_flags = IB_SEND_SIGNALED;
- }
-
+ rpcrdma_set_signaled(ep, send_wr);
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 6e1bba358203..e35efd4ac1e4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@ struct rpcrdma_ia {
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
bool ri_reminv_expected;
+ enum ib_mr_type ri_mrtype;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -95,8 +96,24 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker;
};
-#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
-#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+static inline void
+rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
+{
+ atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
+}
+
+/* To update send queue accounting, provider must take a
+ * send completion every now and then.
+ */
+static inline void
+rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
+{
+ send_wr->send_flags = 0;
+ if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
+ rpcrdma_init_cqcount(ep, 0);
+ send_wr->send_flags = IB_SEND_SIGNALED;
+ }
+}
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
@@ -473,6 +490,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_conn_func(struct rpcrdma_ep *ep);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -532,13 +550,6 @@ rpcrdma_data_dir(bool writing)
}
/*
- * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
- */
-void rpcrdma_connect_worker(struct work_struct *);
-void rpcrdma_conn_func(struct rpcrdma_ep *);
-void rpcrdma_reply_handler(struct work_struct *);
-
-/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
@@ -555,12 +566,14 @@ bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_marshal_req(struct rpc_rqst *);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
+void rpcrdma_reply_handler(struct work_struct *work);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
extern unsigned int xprt_rdma_max_inline_read;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void rpcrdma_connect_worker(struct work_struct *work);
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e01c825bc683..af392d9b9cec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1080,10 +1080,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
if (sk == NULL)
goto out;
for (;;) {
- skb = skb_recv_datagram(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, 0, 1, &err);
if (skb != NULL) {
xs_udp_data_read_skb(&transport->xprt, sk, skb);
- skb_free_datagram_locked(sk, skb);
+ consume_skb(skb);
continue;
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 3b95fe980fa2..017801f9dbaa 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -624,13 +624,10 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
- int err;
-
ASSERT_RTNL();
info->dev = dev;
- err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
- return err;
+ return raw_notifier_call_chain(&switchdev_notif_chain, val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 236b043a4156..0b982d048fb9 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -47,7 +47,7 @@
#include <linux/module.h>
/* configurable TIPC parameters */
-int tipc_net_id __read_mostly;
+unsigned int tipc_net_id __read_mostly;
int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
static int __net_init tipc_init_net(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index a1845fb27d80..5cc5398be722 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -74,7 +74,7 @@ struct tipc_monitor;
#define MAX_BEARERS 3
#define TIPC_DEF_MON_THRESHOLD 32
-extern int tipc_net_id __read_mostly;
+extern unsigned int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
extern int sysctl_tipc_named_timeout __read_mostly;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 6b109a808d4c..02462d67d191 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
- rskb = tipc_buf_acquire(MAX_H_SIZE);
+ rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
if (!rskb)
return;
tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
@@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return -ENOMEM;
- req->buf = tipc_buf_acquire(MAX_H_SIZE);
+ req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
if (!req->buf) {
kfree(req);
return -ENOMEM;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bda89bf9f4ff..4e8647aef01c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1395,7 +1395,7 @@ tnl:
msg_set_seqno(hdr, seqno++);
pktlen = msg_size(hdr);
msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
- tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE);
+ tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
if (!tnlskb) {
pr_warn("%sunable to send packet\n", link_co_err);
return;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17201aa8423d..ab02d0742476 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -58,12 +58,12 @@ static unsigned int align(unsigned int i)
* NOTE: Headroom is reserved to allow prepending of a data link header.
* There may also be unrequested tailroom present at the buffer's end.
*/
-struct sk_buff *tipc_buf_acquire(u32 size)
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
- skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+ skb = alloc_skb_fclone(buf_size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
struct tipc_msg *msg;
struct sk_buff *buf;
- buf = tipc_buf_acquire(hdr_sz + data_sz);
+ buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC);
if (unlikely(!buf))
return NULL;
@@ -261,14 +261,14 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
/* No fragmentation needed? */
if (likely(msz <= pktmax)) {
- skb = tipc_buf_acquire(msz);
+ skb = tipc_buf_acquire(msz, GFP_KERNEL);
if (unlikely(!skb))
return -ENOMEM;
skb_orphan(skb);
__skb_queue_tail(list, skb);
skb_copy_to_linear_data(skb, mhdr, mhsz);
pktpos = skb->data + mhsz;
- if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
+ if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
return dsz;
rc = -EFAULT;
goto error;
@@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
msg_set_importance(&pkthdr, msg_importance(mhdr));
/* Prepare first fragment */
- skb = tipc_buf_acquire(pktmax);
+ skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
if (!skb)
return -ENOMEM;
skb_orphan(skb);
@@ -299,7 +299,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
if (drem < pktrem)
pktrem = drem;
- if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
+ if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
rc = -EFAULT;
goto error;
}
@@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
pktsz = drem + INT_H_SIZE;
else
pktsz = pktmax;
- skb = tipc_buf_acquire(pktsz);
+ skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
if (!skb) {
rc = -ENOMEM;
goto error;
@@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
if (msz > (max / 2))
return false;
- _skb = tipc_buf_acquire(max);
+ _skb = tipc_buf_acquire(max, GFP_ATOMIC);
if (!_skb)
return false;
@@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
/* Never return SHORT header; expand by replacing buffer if necessary */
if (msg_short(hdr)) {
- *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen);
+ *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC);
if (!*skb)
goto exit;
memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 50a739860d37..2c3dc38abf9c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -95,7 +95,7 @@ struct plist;
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
- void *handle;
+ u32 bytes_read;
struct sk_buff *tail;
bool validated;
bool wakeup_pending;
@@ -820,7 +820,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
-struct sk_buff *tipc_buf_acquire(u32 size);
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
bool tipc_msg_validate(struct sk_buff *skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c1cfd92de17a..23f8899e0f8c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -69,7 +69,7 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
+ struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
struct tipc_msg *msg;
if (buf != NULL) {
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3200059d14b2..26ca8dd64ded 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
/* Users of the legacy API (tipc-config) can't handle that we add operations,
* so we have a separate genl handling for the new API.
*/
-struct genl_family tipc_genl_family = {
- .id = GENL_ID_GENERATE,
- .name = TIPC_GENL_V2_NAME,
- .version = TIPC_GENL_V2_VERSION,
- .hdrsize = 0,
- .maxattr = TIPC_NLA_MAX,
- .netnsok = true,
-};
-
static const struct genl_ops tipc_genl_v2_ops[] = {
{
.cmd = TIPC_NL_BEARER_DISABLE,
@@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
#endif
};
+struct genl_family tipc_genl_family __ro_after_init = {
+ .name = TIPC_GENL_V2_NAME,
+ .version = TIPC_GENL_V2_VERSION,
+ .hdrsize = 0,
+ .maxattr = TIPC_NLA_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tipc_genl_v2_ops,
+ .n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
+};
+
int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
{
u32 maxattr = tipc_genl_family.maxattr;
- *attr = tipc_genl_family.attrbuf;
+ *attr = genl_family_attrbuf(&tipc_genl_family);
if (!*attr)
return -EOPNOTSUPP;
return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
}
-int tipc_netlink_start(void)
+int __init tipc_netlink_start(void)
{
int res;
- res = genl_register_family_with_ops(&tipc_genl_family,
- tipc_genl_v2_ops);
+ res = genl_register_family(&tipc_genl_family);
if (res) {
pr_err("Failed to register netlink interface\n");
return res;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1fd464764765..e1ae8a8a2b8e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1215,15 +1215,6 @@ send:
return err;
}
-static struct genl_family tipc_genl_compat_family = {
- .id = GENL_ID_GENERATE,
- .name = TIPC_GENL_NAME,
- .version = TIPC_GENL_VERSION,
- .hdrsize = TIPC_GENL_HDRLEN,
- .maxattr = 0,
- .netnsok = true,
-};
-
static struct genl_ops tipc_genl_compat_ops[] = {
{
.cmd = TIPC_GENL_CMD,
@@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = {
},
};
-int tipc_netlink_compat_start(void)
+static struct genl_family tipc_genl_compat_family __ro_after_init = {
+ .name = TIPC_GENL_NAME,
+ .version = TIPC_GENL_VERSION,
+ .hdrsize = TIPC_GENL_HDRLEN,
+ .maxattr = 0,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tipc_genl_compat_ops,
+ .n_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+};
+
+int __init tipc_netlink_compat_start(void)
{
int res;
- res = genl_register_family_with_ops(&tipc_genl_compat_family,
- tipc_genl_compat_ops);
+ res = genl_register_family(&tipc_genl_compat_family);
if (res) {
pr_err("Failed to register legacy compat interface\n");
return res;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d2f4c2b08ab..27753325e06e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n)
write_lock_bh(&n->lock);
}
+static void tipc_node_write_unlock_fast(struct tipc_node *n)
+{
+ write_unlock_bh(&n->lock);
+}
+
static void tipc_node_write_unlock(struct tipc_node *n)
{
struct net *net = n->net;
@@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_add_tail(subscr, &n->publ_list);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
@@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_del_init(subscr);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 215849ce453d..3cd6402e812c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -86,12 +86,12 @@ struct outqueue_entry {
static void tipc_recv_work(struct work_struct *work);
static void tipc_send_work(struct work_struct *work);
static void tipc_clean_outqueues(struct tipc_conn *con);
-static void tipc_sock_release(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct sockaddr_tipc *saddr = con->server->saddr;
+ struct tipc_server *s = con->server;
+ struct sockaddr_tipc *saddr = s->saddr;
struct socket *sock = con->sock;
struct sock *sk;
@@ -103,9 +103,13 @@ static void tipc_conn_kref_release(struct kref *kref)
}
saddr->scope = -TIPC_NODE_SCOPE;
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
- tipc_sock_release(con);
sock_release(sock);
con->sock = NULL;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
}
tipc_clean_outqueues(con);
@@ -128,8 +132,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
spin_lock_bh(&s->idr_lock);
con = idr_find(&s->conn_idr, conid);
- if (con)
+ if (con && test_bit(CF_CONNECTED, &con->flags))
conn_get(con);
+ else
+ con = NULL;
spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -186,26 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con)
write_unlock_bh(&sk->sk_callback_lock);
}
-static void tipc_sock_release(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
-
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
-
- tipc_unregister_callbacks(con);
-}
-
static void tipc_close_conn(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+ tipc_unregister_callbacks(con);
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
+ if (con->conid)
+ s->tipc_conn_release(con->conid, con->usr_data);
/* We shouldn't flush pending works as we may be in the
* thread. In fact the races with pending rx/tx work structs
@@ -458,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
if (!con)
return -EINVAL;
+ if (!test_bit(CF_CONNECTED, &con->flags)) {
+ conn_put(con);
+ return 0;
+ }
+
e = tipc_alloc_entry(data, len);
if (!e) {
conn_put(con);
@@ -471,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
- if (test_bit(CF_CONNECTED, &con->flags)) {
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
- } else {
+ if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
- }
return 0;
}
@@ -500,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
int ret;
spin_lock_bh(&con->outqueue_lock);
- while (1) {
+ while (test_bit(CF_CONNECTED, &con->flags)) {
e = list_entry(con->outqueue.next, struct outqueue_entry,
list);
if ((struct list_head *) e == &con->outqueue)
@@ -623,14 +619,12 @@ int tipc_server_start(struct tipc_server *s)
void tipc_server_stop(struct tipc_server *s)
{
struct tipc_conn *con;
- int total = 0;
int id;
spin_lock_bh(&s->idr_lock);
- for (id = 0; total < s->idr_in_use; id++) {
+ for (id = 0; s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
if (con) {
- total++;
spin_unlock_bh(&s->idr_lock);
tipc_close_conn(con);
spin_lock_bh(&s->idr_lock);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41f013888f07..800caaa699a1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -44,44 +44,43 @@
#include "bcast.h"
#include "netlink.h"
-#define SS_LISTENING -1 /* socket is listening */
-#define SS_READY -2 /* socket is connectionless */
-
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
-#define TIPC_CONN_OK 0
-#define TIPC_CONN_PROBING 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
+enum {
+ TIPC_LISTEN = TCP_LISTEN,
+ TIPC_ESTABLISHED = TCP_ESTABLISHED,
+ TIPC_OPEN = TCP_CLOSE,
+ TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
+ TIPC_CONNECTING = TCP_SYN_SENT,
+};
+
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
- * @connected: non-zero if port is currently connected to a peer port
* @conn_type: TIPC type used when connection was established
* @conn_instance: TIPC instance used when connection was established
* @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
- * @port_list: adjacent ports in TIPC's global list of ports
* @publications: list of publications for port
* @pub_count: total # of publications port has made during its lifetime
* @probing_state:
- * @probing_intv:
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @link_cong: non-zero if owner must sleep because of link congestion
* @sent_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
- * @remote: 'connected' peer for dgram/rdm
+ * @peer: 'connected' peer for dgram/rdm
* @node: hash table node
* @rcu: rcu struct for tipc_sock
*/
struct tipc_sock {
struct sock sk;
- int connected;
u32 conn_type;
u32 conn_instance;
int published;
@@ -91,17 +90,16 @@ struct tipc_sock {
struct list_head sock_list;
struct list_head publications;
u32 pub_count;
- u32 probing_state;
- unsigned long probing_intv;
uint conn_timeout;
atomic_t dupl_rcvcnt;
+ bool probe_unacked;
bool link_cong;
u16 snt_unacked;
u16 snd_win;
u16 peer_caps;
u16 rcv_unacked;
u16 rcv_win;
- struct sockaddr_tipc remote;
+ struct sockaddr_tipc peer;
struct rhash_head node;
struct rcu_head rcu;
};
@@ -248,6 +246,21 @@ static void tsk_rej_rx_queue(struct sock *sk)
tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
}
+static bool tipc_sk_connected(struct sock *sk)
+{
+ return sk->sk_state == TIPC_ESTABLISHED;
+}
+
+/* tipc_sk_type_connectionless - check if the socket is datagram socket
+ * @sk: socket
+ *
+ * Returns true if connection less, false otherwise
+ */
+static bool tipc_sk_type_connectionless(struct sock *sk)
+{
+ return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
+}
+
/* tsk_peer_msg - verify if message was sent by connected port's peer
*
* Handles cases where the node's network address has changed from
@@ -255,12 +268,13 @@ static void tsk_rej_rx_queue(struct sock *sk)
*/
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
{
- struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id);
+ struct sock *sk = &tsk->sk;
+ struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
u32 peer_port = tsk_peer_port(tsk);
u32 orig_node;
u32 peer_node;
- if (unlikely(!tsk->connected))
+ if (unlikely(!tipc_sk_connected(sk)))
return false;
if (unlikely(msg_origport(msg) != peer_port))
@@ -281,6 +295,45 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
return false;
}
+/* tipc_set_sk_state - set the sk_state of the socket
+ * @sk: socket
+ *
+ * Caller must hold socket lock
+ *
+ * Returns 0 on success, errno otherwise
+ */
+static int tipc_set_sk_state(struct sock *sk, int state)
+{
+ int oldsk_state = sk->sk_state;
+ int res = -EINVAL;
+
+ switch (state) {
+ case TIPC_OPEN:
+ res = 0;
+ break;
+ case TIPC_LISTEN:
+ case TIPC_CONNECTING:
+ if (oldsk_state == TIPC_OPEN)
+ res = 0;
+ break;
+ case TIPC_ESTABLISHED:
+ if (oldsk_state == TIPC_CONNECTING ||
+ oldsk_state == TIPC_OPEN)
+ res = 0;
+ break;
+ case TIPC_DISCONNECTING:
+ if (oldsk_state == TIPC_CONNECTING ||
+ oldsk_state == TIPC_ESTABLISHED)
+ res = 0;
+ break;
+ }
+
+ if (!res)
+ sk->sk_state = state;
+
+ return res;
+}
+
/**
* tipc_sk_create - create a TIPC socket
* @net: network namespace (must be default network)
@@ -298,7 +351,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
{
struct tipc_net *tn;
const struct proto_ops *ops;
- socket_state state;
struct sock *sk;
struct tipc_sock *tsk;
struct tipc_msg *msg;
@@ -310,16 +362,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
switch (sock->type) {
case SOCK_STREAM:
ops = &stream_ops;
- state = SS_UNCONNECTED;
break;
case SOCK_SEQPACKET:
ops = &packet_ops;
- state = SS_UNCONNECTED;
break;
case SOCK_DGRAM:
case SOCK_RDM:
ops = &msg_ops;
- state = SS_READY;
break;
default:
return -EPROTOTYPE;
@@ -340,14 +389,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
/* Finish initializing socket data structures */
sock->ops = ops;
- sock->state = state;
sock_init_data(sock, sk);
+ tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
msg_set_origport(msg, tsk->portid);
setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
+ sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
@@ -360,11 +410,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
tsk->rcv_win = tsk->snd_win;
- if (sock->state == SS_READY) {
+ if (tipc_sk_type_connectionless(sk)) {
tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
tsk_set_unreliable(tsk, true);
}
+
return 0;
}
@@ -375,6 +426,46 @@ static void tipc_sk_callback(struct rcu_head *head)
sock_put(&tsk->sk);
}
+/* Caller should hold socket lock for the socket. */
+static void __tipc_shutdown(struct socket *sock, int error)
+{
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct net *net = sock_net(sk);
+ u32 dnode = tsk_peer_node(tsk);
+ struct sk_buff *skb;
+
+ /* Reject all unreceived messages, except on an active connection
+ * (which disconnects locally & sends a 'FIN+' to peer).
+ */
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ if (TIPC_SKB_CB(skb)->bytes_read) {
+ kfree_skb(skb);
+ continue;
+ }
+ if (!tipc_sk_type_connectionless(sk) &&
+ sk->sk_state != TIPC_DISCONNECTING) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, dnode, tsk->portid);
+ }
+ tipc_sk_respond(sk, skb, error);
+ }
+
+ if (tipc_sk_type_connectionless(sk))
+ return;
+
+ if (sk->sk_state != TIPC_DISCONNECTING) {
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+ TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
+ tsk_own_node(tsk), tsk_peer_port(tsk),
+ tsk->portid, error);
+ if (skb)
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+ tipc_node_remove_conn(net, dnode, tsk->portid);
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ }
+}
+
/**
* tipc_release - destroy a TIPC socket
* @sock: socket to destroy
@@ -394,10 +485,7 @@ static void tipc_sk_callback(struct rcu_head *head)
static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct net *net;
struct tipc_sock *tsk;
- struct sk_buff *skb;
- u32 dnode;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -406,47 +494,16 @@ static int tipc_release(struct socket *sock)
if (sk == NULL)
return 0;
- net = sock_net(sk);
tsk = tipc_sk(sk);
lock_sock(sk);
- /*
- * Reject all unreceived messages, except on an active connection
- * (which disconnects locally & sends a 'FIN+' to peer)
- */
- dnode = tsk_peer_node(tsk);
- while (sock->state != SS_DISCONNECTING) {
- skb = __skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL)
- break;
- if (TIPC_SKB_CB(skb)->handle != NULL)
- kfree_skb(skb);
- else {
- if ((sock->state == SS_CONNECTING) ||
- (sock->state == SS_CONNECTED)) {
- sock->state = SS_DISCONNECTING;
- tsk->connected = 0;
- tipc_node_remove_conn(net, dnode, tsk->portid);
- }
- tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
- }
- }
-
+ __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
+ sk->sk_shutdown = SHUTDOWN_MASK;
tipc_sk_withdraw(tsk, 0, NULL);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
- if (tsk->connected) {
- skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
- TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
- tsk_own_node(tsk), tsk_peer_port(tsk),
- tsk->portid, TIPC_ERR_NO_PORT);
- if (skb)
- tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
- tipc_node_remove_conn(net, dnode, tsk->portid);
- }
/* Reject any messages that accumulated in backlog queue */
- sock->state = SS_DISCONNECTING;
release_sock(sk);
call_rcu(&tsk->rcu, tipc_sk_callback);
@@ -532,13 +589,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_sock *tsk = tipc_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
memset(addr, 0, sizeof(*addr));
if (peer) {
- if ((sock->state != SS_CONNECTED) &&
- ((peer != 2) || (sock->state != SS_DISCONNECTING)))
+ if ((!tipc_sk_connected(sk)) &&
+ ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
return -ENOTCONN;
addr->addr.id.ref = tsk_peer_port(tsk);
addr->addr.id.node = tsk_peer_node(tsk);
@@ -570,28 +628,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* exits. TCP and other protocols seem to rely on higher level poll routines
* to handle any preventable race conditions, so TIPC will do the same ...
*
- * TIPC sets the returned events as follows:
- *
- * socket state flags set
- * ------------ ---------
- * unconnected no read flags
- * POLLOUT if port is not congested
- *
- * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
- * no write flags
- *
- * connected POLLIN/POLLRDNORM if data in rx queue
- * POLLOUT if port is not congested
- *
- * disconnecting POLLIN/POLLRDNORM/POLLHUP
- * no write flags
- *
- * listening POLLIN if SYN in rx queue
- * no write flags
- *
- * ready POLLIN/POLLRDNORM if data in rx queue
- * [connectionless] POLLOUT (since port cannot be congested)
- *
* IMPORTANT: The fact that a read or write operation is indicated does NOT
* imply that the operation will succeed, merely that it should be performed
* and will not block.
@@ -605,22 +641,29 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sk_sleep(sk), wait);
- switch ((int)sock->state) {
- case SS_UNCONNECTED:
- if (!tsk->link_cong)
- mask |= POLLOUT;
- break;
- case SS_READY:
- case SS_CONNECTED:
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
+ switch (sk->sk_state) {
+ case TIPC_ESTABLISHED:
if (!tsk->link_cong && !tsk_conn_cong(tsk))
mask |= POLLOUT;
/* fall thru' */
- case SS_CONNECTING:
- case SS_LISTENING:
+ case TIPC_LISTEN:
+ case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= (POLLIN | POLLRDNORM);
break;
- case SS_DISCONNECTING:
+ case TIPC_OPEN:
+ if (!tsk->link_cong)
+ mask |= POLLOUT;
+ if (tipc_sk_type_connectionless(sk) &&
+ (!skb_queue_empty(&sk->sk_receive_queue)))
+ mask |= (POLLIN | POLLRDNORM);
+ break;
+ case TIPC_DISCONNECTING:
mask = (POLLIN | POLLRDNORM | POLLHUP);
break;
}
@@ -651,6 +694,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
uint mtu;
int rc;
+ if (!timeo && tsk->link_cong)
+ return -ELINKCONG;
+
msg_set_type(mhdr, TIPC_MCAST_MSG);
msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
msg_set_destport(mhdr, 0);
@@ -763,7 +809,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
if (!tsk_peer_msg(tsk, hdr))
goto exit;
- tsk->probing_state = TIPC_CONN_OK;
+ tsk->probe_unacked = false;
if (mtyp == CONN_PROBE) {
msg_set_type(hdr, CONN_PROBE_REPLY);
@@ -786,25 +832,25 @@ exit:
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- DEFINE_WAIT(wait);
int done;
do {
int err = sock_error(sk);
if (err)
return err;
- if (sock->state == SS_DISCONNECTING)
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
return -EPIPE;
if (!*timeo_p)
return -EAGAIN;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, timeo_p, !tsk->link_cong);
- finish_wait(sk_sleep(sk), &wait);
+ add_wait_queue(sk_sleep(sk), &wait);
+ done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
@@ -844,6 +890,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport;
struct sk_buff_head pktchain;
+ bool is_connectionless = tipc_sk_type_connectionless(sk);
struct sk_buff *skb;
struct tipc_name_seq *seq;
struct iov_iter save;
@@ -854,18 +901,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
if (unlikely(!dest)) {
- if (tsk->connected && sock->state == SS_READY)
- dest = &tsk->remote;
+ if (is_connectionless && tsk->peer.family == AF_TIPC)
+ dest = &tsk->peer;
else
return -EDESTADDRREQ;
} else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
dest->family != AF_TIPC) {
return -EINVAL;
}
- if (unlikely(sock->state != SS_READY)) {
- if (sock->state == SS_LISTENING)
+ if (!is_connectionless) {
+ if (sk->sk_state == TIPC_LISTEN)
return -EPIPE;
- if (sock->state != SS_UNCONNECTED)
+ if (sk->sk_state != TIPC_OPEN)
return -EISCONN;
if (tsk->published)
return -EOPNOTSUPP;
@@ -917,8 +964,8 @@ new_mtu:
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
if (likely(!rc)) {
- if (sock->state != SS_READY)
- sock->state = SS_CONNECTING;
+ if (!is_connectionless)
+ tipc_set_sk_state(sk, TIPC_CONNECTING);
return dsz;
}
if (rc == -ELINKCONG) {
@@ -940,30 +987,30 @@ new_mtu:
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- DEFINE_WAIT(wait);
int done;
do {
int err = sock_error(sk);
if (err)
return err;
- if (sock->state == SS_DISCONNECTING)
+ if (sk->sk_state == TIPC_DISCONNECTING)
return -EPIPE;
- else if (sock->state != SS_CONNECTED)
+ else if (!tipc_sk_connected(sk))
return -ENOTCONN;
if (!*timeo_p)
return -EAGAIN;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
done = sk_wait_event(sk, timeo_p,
(!tsk->link_cong &&
!tsk_conn_cong(tsk)) ||
- !tsk->connected);
- finish_wait(sk_sleep(sk), &wait);
+ !tipc_sk_connected(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
@@ -1018,14 +1065,17 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
if (dsz > (uint)INT_MAX)
return -EMSGSIZE;
- if (unlikely(sock->state != SS_CONNECTED)) {
- if (sock->state == SS_DISCONNECTING)
+ if (unlikely(!tipc_sk_connected(sk))) {
+ if (sk->sk_state == TIPC_DISCONNECTING)
return -EPIPE;
else
return -ENOTCONN;
}
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ if (!timeo && tsk->link_cong)
+ return -ELINKCONG;
+
dnode = tsk_peer_node(tsk);
skb_queue_head_init(&pktchain);
@@ -1099,10 +1149,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
msg_set_lookup_scope(msg, 0);
msg_set_hdr_sz(msg, SHORT_H_SIZE);
- tsk->probing_intv = CONN_PROBING_INTERVAL;
- tsk->probing_state = TIPC_CONN_OK;
- tsk->connected = 1;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
@@ -1210,13 +1258,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
static void tipc_sk_send_ack(struct tipc_sock *tsk)
{
- struct net *net = sock_net(&tsk->sk);
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
struct tipc_msg *msg;
u32 peer_port = tsk_peer_port(tsk);
u32 dnode = tsk_peer_node(tsk);
- if (!tsk->connected)
+ if (!tipc_sk_connected(sk))
return;
skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
dnode, tsk_own_node(tsk), peer_port,
@@ -1245,7 +1294,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
- if (sock->state == SS_DISCONNECTING) {
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
err = -ENOTCONN;
break;
}
@@ -1286,6 +1335,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
struct tipc_sock *tsk = tipc_sk(sk);
struct sk_buff *buf;
struct tipc_msg *msg;
+ bool is_connectionless = tipc_sk_type_connectionless(sk);
long timeo;
unsigned int sz;
u32 err;
@@ -1297,7 +1347,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
lock_sock(sk);
- if (unlikely(sock->state == SS_UNCONNECTED)) {
+ if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
res = -ENOTCONN;
goto exit;
}
@@ -1342,8 +1392,8 @@ restart:
goto exit;
res = sz;
} else {
- if ((sock->state == SS_READY) ||
- ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
+ if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
+ m->msg_control)
res = 0;
else
res = -ECONNRESET;
@@ -1352,7 +1402,7 @@ restart:
if (unlikely(flags & MSG_PEEK))
goto exit;
- if (likely(sock->state != SS_READY)) {
+ if (likely(!is_connectionless)) {
tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
tipc_sk_send_ack(tsk);
@@ -1383,7 +1433,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
struct tipc_msg *msg;
long timeo;
unsigned int sz;
- int sz_to_copy, target, needed;
+ int target;
int sz_copied = 0;
u32 err;
int res = 0, hlen;
@@ -1394,7 +1444,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
lock_sock(sk);
- if (unlikely(sock->state == SS_UNCONNECTED)) {
+ if (unlikely(sk->sk_state == TIPC_OPEN)) {
res = -ENOTCONN;
goto exit;
}
@@ -1431,11 +1481,13 @@ restart:
/* Capture message data (if valid) & compute return value (always) */
if (!err) {
- u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
+ u32 offset = TIPC_SKB_CB(buf)->bytes_read;
+ u32 needed;
+ int sz_to_copy;
sz -= offset;
needed = (buf_len - sz_copied);
- sz_to_copy = (sz <= needed) ? sz : needed;
+ sz_to_copy = min(sz, needed);
res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
if (res)
@@ -1445,8 +1497,8 @@ restart:
if (sz_to_copy < sz) {
if (!(flags & MSG_PEEK))
- TIPC_SKB_CB(buf)->handle =
- (void *)(unsigned long)(offset + sz_to_copy);
+ TIPC_SKB_CB(buf)->bytes_read =
+ offset + sz_to_copy;
goto exit;
}
} else {
@@ -1528,49 +1580,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
- struct socket *sock = sk->sk_socket;
struct tipc_msg *hdr = buf_msg(skb);
if (unlikely(msg_mcast(hdr)))
return false;
- switch ((int)sock->state) {
- case SS_CONNECTED:
-
- /* Accept only connection-based messages sent by peer */
- if (unlikely(!tsk_peer_msg(tsk, hdr)))
- return false;
-
- if (unlikely(msg_errcode(hdr))) {
- sock->state = SS_DISCONNECTING;
- tsk->connected = 0;
- /* Let timer expire on it's own */
- tipc_node_remove_conn(net, tsk_peer_node(tsk),
- tsk->portid);
- }
- return true;
-
- case SS_CONNECTING:
-
+ switch (sk->sk_state) {
+ case TIPC_CONNECTING:
/* Accept only ACK or NACK message */
if (unlikely(!msg_connected(hdr)))
return false;
if (unlikely(msg_errcode(hdr))) {
- sock->state = SS_DISCONNECTING;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
sk->sk_err = ECONNREFUSED;
return true;
}
if (unlikely(!msg_isdata(hdr))) {
- sock->state = SS_DISCONNECTING;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
sk->sk_err = EINVAL;
return true;
}
tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
msg_set_importance(&tsk->phdr, msg_importance(hdr));
- sock->state = SS_CONNECTED;
/* If 'ACK+' message, add to socket receive queue */
if (msg_data_sz(hdr))
@@ -1584,18 +1618,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
msg_set_dest_droppable(hdr, 1);
return false;
- case SS_LISTENING:
- case SS_UNCONNECTED:
-
+ case TIPC_OPEN:
+ case TIPC_DISCONNECTING:
+ break;
+ case TIPC_LISTEN:
/* Accept only SYN message */
if (!msg_connected(hdr) && !(msg_errcode(hdr)))
return true;
break;
- case SS_DISCONNECTING:
- break;
+ case TIPC_ESTABLISHED:
+ /* Accept only connection-based messages sent by peer */
+ if (unlikely(!tsk_peer_msg(tsk, hdr)))
+ return false;
+
+ if (unlikely(msg_errcode(hdr))) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ /* Let timer expire on it's own */
+ tipc_node_remove_conn(net, tsk_peer_node(tsk),
+ tsk->portid);
+ sk->sk_state_change(sk);
+ }
+ return true;
default:
- pr_err("Unknown socket state %u\n", sock->state);
+ pr_err("Unknown sk_state %u\n", sk->sk_state);
}
+
return false;
}
@@ -1646,7 +1693,6 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
struct sk_buff_head *xmitq)
{
- struct socket *sock = sk->sk_socket;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = buf_msg(skb);
unsigned int limit = rcvbuf_limit(sk, skb);
@@ -1672,7 +1718,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
}
/* Reject if wrong message type for current socket state */
- if (unlikely(sock->state == SS_READY)) {
+ if (tipc_sk_type_connectionless(sk)) {
if (msg_connected(hdr)) {
err = TIPC_ERR_NO_PORT;
goto reject;
@@ -1689,7 +1735,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
}
/* Enqueue message */
- TIPC_SKB_CB(skb)->handle = NULL;
+ TIPC_SKB_CB(skb)->bytes_read = 0;
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
@@ -1839,8 +1885,8 @@ xmit:
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
int done;
do {
@@ -1852,9 +1898,10 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING);
- finish_wait(sk_sleep(sk), &wait);
+ add_wait_queue(sk_sleep(sk), &wait);
+ done = sk_wait_event(sk, timeo_p,
+ sk->sk_state != TIPC_CONNECTING, &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
@@ -1876,21 +1923,19 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
struct msghdr m = {NULL,};
long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
- socket_state previous;
+ int previous;
int res = 0;
lock_sock(sk);
/* DGRAM/RDM connect(), just save the destaddr */
- if (sock->state == SS_READY) {
+ if (tipc_sk_type_connectionless(sk)) {
if (dst->family == AF_UNSPEC) {
- memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc));
- tsk->connected = 0;
+ memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
} else if (destlen != sizeof(struct sockaddr_tipc)) {
res = -EINVAL;
} else {
- memcpy(&tsk->remote, dest, destlen);
- tsk->connected = 1;
+ memcpy(&tsk->peer, dest, destlen);
}
goto exit;
}
@@ -1906,9 +1951,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
goto exit;
}
- previous = sock->state;
- switch (sock->state) {
- case SS_UNCONNECTED:
+ previous = sk->sk_state;
+
+ switch (sk->sk_state) {
+ case TIPC_OPEN:
/* Send a 'SYN-' to destination */
m.msg_name = dest;
m.msg_namelen = destlen;
@@ -1923,27 +1969,29 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
- /* Just entered SS_CONNECTING state; the only
+ /* Just entered TIPC_CONNECTING state; the only
* difference is that return value in non-blocking
* case is EINPROGRESS, rather than EALREADY.
*/
res = -EINPROGRESS;
- case SS_CONNECTING:
- if (previous == SS_CONNECTING)
- res = -EALREADY;
- if (!timeout)
+ /* fall thru' */
+ case TIPC_CONNECTING:
+ if (!timeout) {
+ if (previous == TIPC_CONNECTING)
+ res = -EALREADY;
goto exit;
+ }
timeout = msecs_to_jiffies(timeout);
/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
res = tipc_wait_for_connect(sock, &timeout);
break;
- case SS_CONNECTED:
+ case TIPC_ESTABLISHED:
res = -EISCONN;
break;
default:
res = -EINVAL;
- break;
}
+
exit:
release_sock(sk);
return res;
@@ -1962,15 +2010,9 @@ static int tipc_listen(struct socket *sock, int len)
int res;
lock_sock(sk);
-
- if (sock->state != SS_UNCONNECTED)
- res = -EINVAL;
- else {
- sock->state = SS_LISTENING;
- res = 0;
- }
-
+ res = tipc_set_sk_state(sk, TIPC_LISTEN);
release_sock(sk);
+
return res;
}
@@ -1996,9 +2038,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- err = -EINVAL;
- if (sock->state != SS_LISTENING)
- break;
err = -EAGAIN;
if (!timeo)
break;
@@ -2029,7 +2068,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
lock_sock(sk);
- if (sock->state != SS_LISTENING) {
+ if (sk->sk_state != TIPC_LISTEN) {
res = -EINVAL;
goto exit;
}
@@ -2040,7 +2079,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
@@ -2060,7 +2099,6 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- new_sock->state = SS_CONNECTED;
tsk_set_importance(new_tsock, msg_importance(msg));
if (msg_named(msg)) {
@@ -2100,13 +2138,6 @@ exit:
static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *skb;
- u32 dnode = tsk_peer_node(tsk);
- u32 dport = tsk_peer_port(tsk);
- u32 onode = tipc_own_addr(net);
- u32 oport = tsk->portid;
int res;
if (how != SHUT_RDWR)
@@ -2114,45 +2145,17 @@ static int tipc_shutdown(struct socket *sock, int how)
lock_sock(sk);
- switch (sock->state) {
- case SS_CONNECTING:
- case SS_CONNECTED:
-
-restart:
- dnode = tsk_peer_node(tsk);
-
- /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
- skb = __skb_dequeue(&sk->sk_receive_queue);
- if (skb) {
- if (TIPC_SKB_CB(skb)->handle != NULL) {
- kfree_skb(skb);
- goto restart;
- }
- tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN);
- } else {
- skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
- TIPC_CONN_MSG, SHORT_H_SIZE,
- 0, dnode, onode, dport, oport,
- TIPC_CONN_SHUTDOWN);
- if (skb)
- tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
- }
- tsk->connected = 0;
- sock->state = SS_DISCONNECTING;
- tipc_node_remove_conn(net, dnode, tsk->portid);
- /* fall through */
-
- case SS_DISCONNECTING:
+ __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
+ sk->sk_shutdown = SEND_SHUTDOWN;
+ if (sk->sk_state == TIPC_DISCONNECTING) {
/* Discard any unreceived messages */
__skb_queue_purge(&sk->sk_receive_queue);
/* Wake up anyone sleeping in poll */
sk->sk_state_change(sk);
res = 0;
- break;
-
- default:
+ } else {
res = -ENOTCONN;
}
@@ -2169,17 +2172,16 @@ static void tipc_sk_timeout(unsigned long data)
u32 own_node = tsk_own_node(tsk);
bh_lock_sock(sk);
- if (!tsk->connected) {
+ if (!tipc_sk_connected(sk)) {
bh_unlock_sock(sk);
goto exit;
}
peer_port = tsk_peer_port(tsk);
peer_node = tsk_peer_node(tsk);
- if (tsk->probing_state == TIPC_CONN_PROBING) {
+ if (tsk->probe_unacked) {
if (!sock_owned_by_user(sk)) {
- sk->sk_socket->state = SS_DISCONNECTING;
- tsk->connected = 0;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
tsk_peer_port(tsk));
sk->sk_state_change(sk);
@@ -2188,13 +2190,15 @@ static void tipc_sk_timeout(unsigned long data)
sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
}
- } else {
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
- INT_H_SIZE, 0, peer_node, own_node,
- peer_port, tsk->portid, TIPC_OK);
- tsk->probing_state = TIPC_CONN_PROBING;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
+ bh_unlock_sock(sk);
+ goto exit;
}
+
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
+ INT_H_SIZE, 0, peer_node, own_node,
+ peer_port, tsk->portid, TIPC_OK);
+ tsk->probe_unacked = true;
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
bh_unlock_sock(sk);
if (skb)
tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
@@ -2205,11 +2209,12 @@ exit:
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq)
{
- struct net *net = sock_net(&tsk->sk);
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
struct publication *publ;
u32 key;
- if (tsk->connected)
+ if (tipc_sk_connected(sk))
return -EINVAL;
key = tsk->portid + tsk->pub_count + 1;
if (key == tsk->portid)
@@ -2667,6 +2672,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
struct nlattr *attrs;
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct sock *sk = &tsk->sk;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -2681,7 +2687,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
goto attr_msg_cancel;
- if (tsk->connected) {
+ if (tipc_sk_connected(sk)) {
err = __tipc_nl_add_sk_con(skb, tsk);
if (err)
goto attr_msg_cancel;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0dd02244e21d..9d94e65d0894 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,6 +54,8 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
+static void tipc_subscrp_put(struct tipc_subscription *subscription);
+static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
+ tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
{
struct tipc_subscription *sub = (struct tipc_subscription *)data;
- struct tipc_subscriber *subscriber = sub->subscriber;
/* Notify subscriber of timeout */
tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
TIPC_SUBSCR_TIMEOUT, 0, 0);
- spin_lock_bh(&subscriber->lock);
- tipc_subscrp_delete(sub);
- spin_unlock_bh(&subscriber->lock);
-
- tipc_subscrb_put(subscriber);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrb_kref_release(struct kref *kref)
{
- struct tipc_subscriber *subcriber = container_of(kref,
- struct tipc_subscriber, kref);
-
- kfree(subcriber);
+ kfree(container_of(kref,struct tipc_subscriber, kref));
}
static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
@@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
kref_get(&subscriber->kref);
}
+static void tipc_subscrp_kref_release(struct kref *kref)
+{
+ struct tipc_subscription *sub = container_of(kref,
+ struct tipc_subscription,
+ kref);
+ struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ struct tipc_subscriber *subscriber = sub->subscriber;
+
+ spin_lock_bh(&subscriber->lock);
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ atomic_dec(&tn->subscription_count);
+ spin_unlock_bh(&subscriber->lock);
+ kfree(sub);
+ tipc_subscrb_put(subscriber);
+}
+
+static void tipc_subscrp_put(struct tipc_subscription *subscription)
+{
+ kref_put(&subscription->kref, tipc_subscrp_kref_release);
+}
+
+static void tipc_subscrp_get(struct tipc_subscription *subscription)
+{
+ kref_get(&subscription->kref);
+}
+
+/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
+ * subscriptions for a given subscriber.
+ */
+static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
+ struct tipc_subscr *s)
+{
+ struct list_head *subscription_list = &subscriber->subscrp_list;
+ struct tipc_subscription *sub, *temp;
+
+ spin_lock_bh(&subscriber->lock);
+ list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
+ if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
+ continue;
+
+ tipc_subscrp_get(sub);
+ spin_unlock_bh(&subscriber->lock);
+ tipc_subscrp_delete(sub);
+ tipc_subscrp_put(sub);
+ spin_lock_bh(&subscriber->lock);
+
+ if (s)
+ break;
+ }
+ spin_unlock_bh(&subscriber->lock);
+}
+
static struct tipc_subscriber *tipc_subscrb_create(int conid)
{
struct tipc_subscriber *subscriber;
@@ -177,8 +226,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
pr_warn("Subscriber rejected, no memory\n");
return NULL;
}
- kref_init(&subscriber->kref);
INIT_LIST_HEAD(&subscriber->subscrp_list);
+ kref_init(&subscriber->kref);
subscriber->conid = conid;
spin_lock_init(&subscriber->lock);
@@ -187,55 +236,22 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Destroy any existing subscriptions for subscriber */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- }
- spin_unlock_bh(&subscriber->lock);
-
+ tipc_subscrb_subscrp_delete(subscriber, NULL);
tipc_subscrb_put(subscriber);
}
static void tipc_subscrp_delete(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- kfree(sub);
- atomic_dec(&tn->subscription_count);
+ if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Find first matching subscription, exit if not found */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) ||
- del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- break;
- }
- }
- spin_unlock_bh(&subscriber->lock);
+ tipc_subscrb_subscrp_delete(subscriber, s);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -272,6 +288,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
sub->swap = swap;
memcpy(&sub->evt.s, s, sizeof(*s));
atomic_inc(&tn->subscription_count);
+ kref_init(&sub->kref);
return sub;
}
@@ -288,17 +305,16 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
spin_lock_bh(&subscriber->lock);
list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- tipc_subscrb_get(subscriber);
sub->subscriber = subscriber;
tipc_nametbl_subscribe(sub);
+ tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
+ setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
timeout = htohl(sub->evt.s.timeout, swap);
- if (timeout == TIPC_WAIT_FOREVER)
- return;
- setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
- mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
+ if (timeout != TIPC_WAIT_FOREVER)
+ mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
}
/* Handle one termination request for the subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index be60103082c9..ffdc214c117a 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -57,6 +57,7 @@ struct tipc_subscriber;
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
+ struct kref kref;
struct tipc_subscriber *subscriber;
struct net *net;
struct timer_list timer;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2358f2690ec5..cef79873b09d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -100,7 +100,7 @@
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
@@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
- if (dentry && d_real_inode(dentry) == i) {
+ if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
goto found;
}
@@ -913,7 +913,7 @@ static struct sock *unix_find_other(struct net *net,
err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- inode = d_real_inode(path.dentry);
+ inode = d_backing_inode(path.dentry);
err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
@@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path = { NULL, NULL };
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ if (sun_path[0]) {
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
+ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ goto out;
+ }
+ }
+
err = mutex_lock_interruptible(&u->bindlock);
if (err)
- goto out;
+ goto out_put;
err = -EINVAL;
if (u->addr)
@@ -1029,18 +1041,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
atomic_set(&addr->refcnt, 1);
if (sun_path[0]) {
- struct path path;
- umode_t mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
- if (err) {
- if (err == -EEXIST)
- err = -EADDRINUSE;
- unix_release_addr(addr);
- goto out_up;
- }
addr->hash = UNIX_HASH_SIZE;
- hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
u->path = path;
list = &unix_socket_table[hash];
@@ -1065,6 +1067,9 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->bindlock);
+out_put:
+ if (err)
+ path_put(&path);
out:
return err;
}
@@ -2113,8 +2118,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
- skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
- &last);
+ skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
+ &err, &last);
if (skb)
break;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 936d7eee62d0..6788264acc63 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -44,6 +44,10 @@ struct virtio_vsock {
spinlock_t send_pkt_list_lock;
struct list_head send_pkt_list;
+ struct work_struct loopback_work;
+ spinlock_t loopback_list_lock; /* protects loopback_list */
+ struct list_head loopback_list;
+
atomic_t queued_replies;
/* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
@@ -74,6 +78,42 @@ static u32 virtio_transport_get_local_cid(void)
return vsock->guest_cid;
}
+static void virtio_transport_loopback_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, loopback_work);
+ LIST_HEAD(pkts);
+
+ spin_lock_bh(&vsock->loopback_list_lock);
+ list_splice_init(&vsock->loopback_list, &pkts);
+ spin_unlock_bh(&vsock->loopback_list_lock);
+
+ mutex_lock(&vsock->rx_lock);
+ while (!list_empty(&pkts)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ virtio_transport_recv_pkt(pkt);
+ }
+ mutex_unlock(&vsock->rx_lock);
+}
+
+static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
+ struct virtio_vsock_pkt *pkt)
+{
+ int len = pkt->len;
+
+ spin_lock_bh(&vsock->loopback_list_lock);
+ list_add_tail(&pkt->list, &vsock->loopback_list);
+ spin_unlock_bh(&vsock->loopback_list_lock);
+
+ queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
+
+ return len;
+}
+
static void
virtio_transport_send_pkt_work(struct work_struct *work)
{
@@ -159,6 +199,9 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
return -ENODEV;
}
+ if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
+ return virtio_transport_send_pkt_loopback(vsock, pkt);
+
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
@@ -336,7 +379,7 @@ static void virtio_vsock_reset_sock(struct sock *sk)
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
{
struct virtio_device *vdev = vsock->vdev;
- u64 guest_cid;
+ __le64 guest_cid;
vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
&guest_cid, sizeof(guest_cid));
@@ -510,10 +553,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_init(&vsock->event_lock);
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
+ spin_lock_init(&vsock->loopback_list_lock);
+ INIT_LIST_HEAD(&vsock->loopback_list);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+ INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
mutex_lock(&vsock->rx_lock);
virtio_vsock_rx_fill(vsock);
@@ -539,6 +585,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
struct virtio_vsock *vsock = vdev->priv;
struct virtio_vsock_pkt *pkt;
+ flush_work(&vsock->loopback_work);
flush_work(&vsock->rx_work);
flush_work(&vsock->tx_work);
flush_work(&vsock->event_work);
@@ -565,6 +612,15 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
+ spin_lock_bh(&vsock->loopback_list_lock);
+ while (!list_empty(&vsock->loopback_list)) {
+ pkt = list_first_entry(&vsock->loopback_list,
+ struct virtio_vsock_pkt, list);
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->loopback_list_lock);
+
mutex_lock(&the_virtio_vsock_mutex);
the_virtio_vsock = NULL;
vsock_core_exit();
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a53b3a16b4f1..849c4ad0411e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -32,7 +32,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void)
return container_of(t, struct virtio_transport, transport);
}
-struct virtio_vsock_pkt *
+static struct virtio_vsock_pkt *
virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
size_t len,
u32 src_cid,
@@ -82,7 +82,6 @@ out_pkt:
kfree(pkt);
return NULL;
}
-EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt);
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
@@ -606,9 +605,9 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
return 0;
pkt = virtio_transport_alloc_pkt(&info, 0,
- le32_to_cpu(pkt->hdr.dst_cid),
+ le64_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port),
- le32_to_cpu(pkt->hdr.src_cid),
+ le64_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
if (!pkt)
return -ENOMEM;
@@ -619,17 +618,17 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
static void virtio_transport_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ add_wait_queue(sk_sleep(sk), &wait);
do {
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (sk_wait_event(sk, &timeout,
- sock_flag(sk, SOCK_DONE)))
+ sock_flag(sk, SOCK_DONE), &wait))
break;
} while (!signal_pending(current) && timeout);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
}
@@ -823,7 +822,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RESPONSE,
.type = VIRTIO_VSOCK_TYPE_STREAM,
- .remote_cid = le32_to_cpu(pkt->hdr.src_cid),
+ .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
.remote_port = le32_to_cpu(pkt->hdr.src_port),
.reply = true,
};
@@ -863,9 +862,9 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
child->sk_state = SS_CONNECTED;
vchild = vsock_sk(child);
- vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid),
+ vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port));
- vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid),
+ vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
vsock_insert_connected(vchild);
@@ -904,9 +903,9 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
struct sock *sk;
bool space_available;
- vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid),
+ vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
- vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid),
+ vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port));
trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index fd8cf0214d51..1406db4d97d1 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -662,19 +662,19 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
/* Socket control packet based operations. */
const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
- vmci_transport_notify_pkt_socket_init,
- vmci_transport_notify_pkt_socket_destruct,
- vmci_transport_notify_pkt_poll_in,
- vmci_transport_notify_pkt_poll_out,
- vmci_transport_notify_pkt_handle_pkt,
- vmci_transport_notify_pkt_recv_init,
- vmci_transport_notify_pkt_recv_pre_block,
- vmci_transport_notify_pkt_recv_pre_dequeue,
- vmci_transport_notify_pkt_recv_post_dequeue,
- vmci_transport_notify_pkt_send_init,
- vmci_transport_notify_pkt_send_pre_block,
- vmci_transport_notify_pkt_send_pre_enqueue,
- vmci_transport_notify_pkt_send_post_enqueue,
- vmci_transport_notify_pkt_process_request,
- vmci_transport_notify_pkt_process_negotiate,
+ .socket_init = vmci_transport_notify_pkt_socket_init,
+ .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
+ .poll_in = vmci_transport_notify_pkt_poll_in,
+ .poll_out = vmci_transport_notify_pkt_poll_out,
+ .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
+ .recv_init = vmci_transport_notify_pkt_recv_init,
+ .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
+ .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
+ .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
+ .send_init = vmci_transport_notify_pkt_send_init,
+ .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
+ .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
+ .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
+ .process_request = vmci_transport_notify_pkt_process_request,
+ .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
};
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 21e591dafb03..f3a0afc46208 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -420,19 +420,19 @@ vmci_transport_notify_pkt_send_pre_enqueue(
/* Socket always on control packet based operations. */
const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
- vmci_transport_notify_pkt_socket_init,
- vmci_transport_notify_pkt_socket_destruct,
- vmci_transport_notify_pkt_poll_in,
- vmci_transport_notify_pkt_poll_out,
- vmci_transport_notify_pkt_handle_pkt,
- vmci_transport_notify_pkt_recv_init,
- vmci_transport_notify_pkt_recv_pre_block,
- vmci_transport_notify_pkt_recv_pre_dequeue,
- vmci_transport_notify_pkt_recv_post_dequeue,
- vmci_transport_notify_pkt_send_init,
- vmci_transport_notify_pkt_send_pre_block,
- vmci_transport_notify_pkt_send_pre_enqueue,
- vmci_transport_notify_pkt_send_post_enqueue,
- vmci_transport_notify_pkt_process_request,
- vmci_transport_notify_pkt_process_negotiate,
+ .socket_init = vmci_transport_notify_pkt_socket_init,
+ .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
+ .poll_in = vmci_transport_notify_pkt_poll_in,
+ .poll_out = vmci_transport_notify_pkt_poll_out,
+ .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
+ .recv_init = vmci_transport_notify_pkt_recv_init,
+ .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
+ .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
+ .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
+ .send_init = vmci_transport_notify_pkt_send_init,
+ .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
+ .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
+ .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
+ .process_request = vmci_transport_notify_pkt_process_request,
+ .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
};
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3f816e2971ee..5db731512014 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = {
size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-struct genl_family wimax_gnl_family = {
- .id = GENL_ID_GENERATE,
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+ { .name = "msg", },
+};
+
+struct genl_family wimax_gnl_family __ro_after_init = {
.name = "WiMAX",
.version = WIMAX_GNL_VERSION,
.hdrsize = 0,
.maxattr = WIMAX_GNL_ATTR_MAX,
-};
-
-static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
- { .name = "msg", },
+ .module = THIS_MODULE,
+ .ops = wimax_gnl_ops,
+ .n_ops = ARRAY_SIZE(wimax_gnl_ops),
+ .mcgrps = wimax_gnl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
};
@@ -596,11 +600,7 @@ int __init wimax_subsys_init(void)
d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
"wimax.debug");
- snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
- "WiMAX");
- result = genl_register_family_with_ops_groups(&wimax_gnl_family,
- wimax_gnl_ops,
- wimax_gnl_mcgrps);
+ result = genl_register_family(&wimax_gnl_family);
if (unlikely(result < 0)) {
pr_err("cannot register generic netlink family: %d\n", result);
goto error_register_family;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 4c9e39f04ef8..816c9331c8d2 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -17,8 +17,6 @@ cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
CFLAGS_trace.o := -I$(src)
-ccflags-y += -D__CHECK_ENDIAN__
-
$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
@$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 8201e6d7449e..158c59ecf90a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
return;
- if (!wdev->p2p_started)
+ if (!wdev_running(wdev))
return;
rdev_stop_p2p_device(rdev, wdev);
- wdev->p2p_started = false;
+ wdev->is_running = false;
rdev->opencount--;
@@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
return;
- if (!wdev->nan_started)
+ if (!wdev_running(wdev))
return;
rdev_stop_nan(rdev, wdev);
- wdev->nan_started = false;
+ wdev->is_running = false;
rdev->opencount--;
}
@@ -562,6 +562,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
c->limits[j].max > 1))
return -EINVAL;
+ /*
+ * This isn't well-defined right now. If you have an
+ * IBSS interface, then its beacon interval may change
+ * by joining other networks, and nothing prevents it
+ * from doing that.
+ * So technically we probably shouldn't even allow AP
+ * and IBSS in the same interface, but it seems that
+ * some drivers support that, possibly only with fixed
+ * beacon intervals for IBSS.
+ */
+ if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) &&
+ c->beacon_int_min_gcd)) {
+ return -EINVAL;
+ }
+
cnt += c->limits[j].max;
/*
* Don't advertise an unsupported type
@@ -571,6 +586,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
return -EINVAL;
}
+#ifndef CONFIG_WIRELESS_WDS
+ if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS)))
+ return -EINVAL;
+#endif
+
/* You can't even choose that many! */
if (WARN_ON(cnt < c->max_interfaces))
return -EINVAL;
@@ -609,6 +629,11 @@ int wiphy_register(struct wiphy *wiphy)
!rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
return -EINVAL;
+#ifndef CONFIG_WIRELESS_WDS
+ if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)))
+ return -EINVAL;
+#endif
+
/*
* if a wiphy has unsupported modes for regulatory channel enforcement,
* opt-out of enforcement checking
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f0c0c8a48c92..af6e023020b1 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -346,7 +346,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
const u8 *ssid, int ssid_len,
const u8 *ie, int ie_len,
const u8 *key, int key_len, int key_idx,
- const u8 *sae_data, int sae_data_len);
+ const u8 *auth_data, int auth_data_len);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct ieee80211_channel *chan,
@@ -410,6 +410,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev);
void cfg80211_sme_deauth(struct wireless_dev *wdev);
void cfg80211_sme_auth_timeout(struct wireless_dev *wdev);
void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev);
+void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev);
/* internal helpers */
bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher);
@@ -476,7 +477,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
u32 *mask);
int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
- u32 beacon_int);
+ enum nl80211_iftype iftype, u32 beacon_int);
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num);
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 71447cf86306..ba0a1f398ce5 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -556,7 +556,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */
break;
- case 0:
+ default:
memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
break;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index fa2066b56f36..2d8518a37eab 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -183,6 +183,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
wdev->mesh_id_len = setup->mesh_id_len;
wdev->chandef = setup->chandef;
+ wdev->beacon_interval = setup->beacon_interval;
}
return err;
@@ -258,6 +259,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
err = rdev_leave_mesh(rdev, dev);
if (!err) {
wdev->mesh_id_len = 0;
+ wdev->beacon_interval = 0;
memset(&wdev->chandef, 0, sizeof(wdev->chandef));
rdev_set_qos_map(rdev, dev, NULL);
}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index cbb48e26a871..4646cf5695b9 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss)
}
EXPORT_SYMBOL(cfg80211_assoc_timeout);
+void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+
+ cfg80211_sme_abandon_assoc(wdev);
+
+ cfg80211_unhold_bss(bss_from_pub(bss));
+ cfg80211_put_bss(wiphy, bss);
+}
+EXPORT_SYMBOL(cfg80211_abandon_assoc);
+
void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -204,14 +216,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
const u8 *ssid, int ssid_len,
const u8 *ie, int ie_len,
const u8 *key, int key_len, int key_idx,
- const u8 *sae_data, int sae_data_len)
+ const u8 *auth_data, int auth_data_len)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_auth_request req = {
.ie = ie,
.ie_len = ie_len,
- .sae_data = sae_data,
- .sae_data_len = sae_data_len,
+ .auth_data = auth_data,
+ .auth_data_len = auth_data_len,
.auth_type = auth_type,
.key = key,
.key_len = key_len,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c510810f0b7c..5c1b267e22be 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
struct cfg80211_crypto_settings *settings,
int cipher_limit);
-static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
/* the netlink family */
-static struct genl_family nl80211_fam = {
- .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
- .name = NL80211_GENL_NAME, /* have users key off the name instead */
- .hdrsize = 0, /* no private header */
- .version = 1, /* no particular meaning now */
- .maxattr = NL80211_ATTR_MAX,
- .netnsok = true,
- .pre_doit = nl80211_pre_doit,
- .post_doit = nl80211_post_doit,
-};
+static struct genl_family nl80211_fam;
/* multicast groups */
enum nl80211_multicast_groups {
@@ -357,7 +343,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 },
[NL80211_ATTR_WDEV] = { .type = NLA_U64 },
[NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
- [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, },
+ [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
[NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN },
[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
[NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
@@ -414,6 +400,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
[NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
+ [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
+ .len = FILS_MAX_KEK_LEN },
+ [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN },
+ [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
+ [NL80211_ATTR_BSSID] = { .len = ETH_ALEN },
};
/* policy for the key attributes */
@@ -435,6 +426,7 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
[NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
};
+#ifdef CONFIG_PM
/* policy for WoWLAN attributes */
static const struct nla_policy
nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
@@ -468,6 +460,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 },
[NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
};
+#endif /* CONFIG_PM */
/* policy for coalesce rule attributes */
static const struct nla_policy
@@ -551,13 +544,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ genl_family_attrbuf(&nl80211_fam),
+ nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_unlock;
- *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *wdev = __cfg80211_wdev_from_attrs(
+ sock_net(skb->sk),
+ genl_family_attrbuf(&nl80211_fam));
if (IS_ERR(*wdev)) {
err = PTR_ERR(*wdev);
goto out_unlock;
@@ -1075,6 +1069,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
c->radar_detect_regions)))
goto nla_put_failure;
+ if (c->beacon_int_min_gcd &&
+ nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD,
+ c->beacon_int_min_gcd))
+ goto nla_put_failure;
nla_nest_end(msg, nl_combi);
}
@@ -1322,6 +1320,95 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
return 0;
}
+#define CMD(op, n) \
+ do { \
+ if (rdev->ops->op) { \
+ i++; \
+ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
+ goto nla_put_failure; \
+ } \
+ } while (0)
+
+static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev,
+ struct sk_buff *msg)
+{
+ int i = 0;
+
+ /*
+ * do *NOT* add anything into this function, new things need to be
+ * advertised only to new versions of userspace that can deal with
+ * the split (and they can't possibly care about new features...
+ */
+ CMD(add_virtual_intf, NEW_INTERFACE);
+ CMD(change_virtual_intf, SET_INTERFACE);
+ CMD(add_key, NEW_KEY);
+ CMD(start_ap, START_AP);
+ CMD(add_station, NEW_STATION);
+ CMD(add_mpath, NEW_MPATH);
+ CMD(update_mesh_config, SET_MESH_CONFIG);
+ CMD(change_bss, SET_BSS);
+ CMD(auth, AUTHENTICATE);
+ CMD(assoc, ASSOCIATE);
+ CMD(deauth, DEAUTHENTICATE);
+ CMD(disassoc, DISASSOCIATE);
+ CMD(join_ibss, JOIN_IBSS);
+ CMD(join_mesh, JOIN_MESH);
+ CMD(set_pmksa, SET_PMKSA);
+ CMD(del_pmksa, DEL_PMKSA);
+ CMD(flush_pmksa, FLUSH_PMKSA);
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+ CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+ CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+ CMD(mgmt_tx, FRAME);
+ CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
+ if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
+ goto nla_put_failure;
+ }
+ if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
+ rdev->ops->join_mesh) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+ goto nla_put_failure;
+ }
+ CMD(set_wds_peer, SET_WDS_PEER);
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+ CMD(tdls_mgmt, TDLS_MGMT);
+ CMD(tdls_oper, TDLS_OPER);
+ }
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ CMD(sched_scan_start, START_SCHED_SCAN);
+ CMD(probe_client, PROBE_CLIENT);
+ CMD(set_noack_map, SET_NOACK_MAP);
+ if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+ goto nla_put_failure;
+ }
+ CMD(start_p2p_device, START_P2P_DEVICE);
+ CMD(set_mcast_rate, SET_MCAST_RATE);
+#ifdef CONFIG_NL80211_TESTMODE
+ CMD(testmode_cmd, TESTMODE);
+#endif
+
+ if (rdev->ops->connect || rdev->ops->auth) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
+ goto nla_put_failure;
+ }
+
+ if (rdev->ops->disconnect || rdev->ops->deauth) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+ goto nla_put_failure;
+ }
+
+ return i;
+ nla_put_failure:
+ return -ENOBUFS;
+}
+
struct nl80211_dump_wiphy_state {
s64 filter_wiphy;
long start;
@@ -1549,68 +1636,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (!nl_cmds)
goto nla_put_failure;
- i = 0;
-#define CMD(op, n) \
- do { \
- if (rdev->ops->op) { \
- i++; \
- if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
- goto nla_put_failure; \
- } \
- } while (0)
-
- CMD(add_virtual_intf, NEW_INTERFACE);
- CMD(change_virtual_intf, SET_INTERFACE);
- CMD(add_key, NEW_KEY);
- CMD(start_ap, START_AP);
- CMD(add_station, NEW_STATION);
- CMD(add_mpath, NEW_MPATH);
- CMD(update_mesh_config, SET_MESH_CONFIG);
- CMD(change_bss, SET_BSS);
- CMD(auth, AUTHENTICATE);
- CMD(assoc, ASSOCIATE);
- CMD(deauth, DEAUTHENTICATE);
- CMD(disassoc, DISASSOCIATE);
- CMD(join_ibss, JOIN_IBSS);
- CMD(join_mesh, JOIN_MESH);
- CMD(set_pmksa, SET_PMKSA);
- CMD(del_pmksa, DEL_PMKSA);
- CMD(flush_pmksa, FLUSH_PMKSA);
- if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
- CMD(remain_on_channel, REMAIN_ON_CHANNEL);
- CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
- CMD(mgmt_tx, FRAME);
- CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
- if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
- goto nla_put_failure;
- }
- if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
- rdev->ops->join_mesh) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
- goto nla_put_failure;
- }
- CMD(set_wds_peer, SET_WDS_PEER);
- if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
- CMD(tdls_mgmt, TDLS_MGMT);
- CMD(tdls_oper, TDLS_OPER);
- }
- if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
- CMD(sched_scan_start, START_SCHED_SCAN);
- CMD(probe_client, PROBE_CLIENT);
- CMD(set_noack_map, SET_NOACK_MAP);
- if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
- goto nla_put_failure;
- }
- CMD(start_p2p_device, START_P2P_DEVICE);
- CMD(set_mcast_rate, SET_MCAST_RATE);
-#ifdef CONFIG_NL80211_TESTMODE
- CMD(testmode_cmd, TESTMODE);
-#endif
+ i = nl80211_add_commands_unsplit(rdev, msg);
+ if (i < 0)
+ goto nla_put_failure;
if (state->split) {
CMD(crit_proto_start, CRIT_PROTOCOL_START);
CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
@@ -1620,22 +1648,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (rdev->wiphy.features &
NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)
CMD(add_tx_ts, ADD_TX_TS);
+ CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
+ CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
}
- /* add into the if now */
#undef CMD
- if (rdev->ops->connect || rdev->ops->auth) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
- goto nla_put_failure;
- }
-
- if (rdev->ops->disconnect || rdev->ops->deauth) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
- goto nla_put_failure;
- }
-
nla_nest_end(msg, nl_cmds);
state->split_start++;
if (state->split)
@@ -1881,7 +1898,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl80211_dump_wiphy_state *state)
{
- struct nlattr **tb = nl80211_fam.attrbuf;
+ struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
tb, nl80211_fam.maxattr, nl80211_policy);
/* ignore parse errors for backward compatibility */
@@ -2296,10 +2313,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_txq_params,
info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
rem_txq_params) {
- result = nla_parse(tb, NL80211_TXQ_ATTR_MAX,
- nla_data(nl_txq_params),
- nla_len(nl_txq_params),
- txq_params_policy);
+ result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
+ nl_txq_params,
+ txq_params_policy);
if (result)
return result;
result = parse_txq_params(tb, &txq_params);
@@ -3549,8 +3565,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
sband = rdev->wiphy.bands[band];
if (sband == NULL)
return -EINVAL;
- err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
- nla_len(tx_rates), nl80211_txattr_policy);
+ err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
+ nl80211_txattr_policy);
if (err)
return err;
if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3756,12 +3772,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
auth_type == NL80211_AUTHTYPE_SAE)
return false;
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_FILS_STA) &&
+ (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+ auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+ auth_type == NL80211_AUTHTYPE_FILS_PK))
+ return false;
return true;
case NL80211_CMD_CONNECT:
case NL80211_CMD_START_AP:
/* SAE not supported yet */
if (auth_type == NL80211_AUTHTYPE_SAE)
return false;
+ /* FILS not supported yet */
+ if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+ auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+ auth_type == NL80211_AUTHTYPE_FILS_PK)
+ return false;
return true;
default:
return false;
@@ -3803,7 +3830,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params.dtim_period =
nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
- err = cfg80211_validate_beacon_int(rdev, params.beacon_interval);
+ err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype,
+ params.beacon_interval);
if (err)
return err;
@@ -4587,6 +4615,15 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
break;
}
+ /*
+ * Older kernel versions ignored this attribute entirely, so don't
+ * reject attempts to update it but mark it as unused instead so the
+ * driver won't look at the data.
+ */
+ if (statype != CFG80211_STA_AP_CLIENT_UNASSOC &&
+ statype != CFG80211_STA_TDLS_PEER_SETUP)
+ params->opmode_notif_used = false;
+
return 0;
}
EXPORT_SYMBOL(cfg80211_check_station_change);
@@ -4826,6 +4863,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
params.local_pm = pm;
}
+ if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
+ params.opmode_notif_used = true;
+ params.opmode_notif =
+ nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
+ }
+
/* Include parameters for TDLS peer (will check later) */
err = nl80211_set_station_tdls(info, &params);
if (err)
@@ -6305,9 +6348,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
rem_reg_rules) {
- r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
- nla_data(nl_reg_rule), nla_len(nl_reg_rule),
- reg_rule_policy);
+ r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
+ nl_reg_rule, reg_rule_policy);
if (r)
goto bad_reg;
r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6374,8 +6416,8 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
if (!nla_ok(nest, nla_len(nest)))
return -EINVAL;
- err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest),
- nla_len(nest), nl80211_bss_select_policy);
+ err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
+ nl80211_bss_select_policy);
if (err)
return err;
@@ -6677,7 +6719,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
- if (info->attrs[NL80211_ATTR_MAC])
+ /* Initial implementation used NL80211_ATTR_MAC to set the specific
+ * BSSID to scan for. This was problematic because that same attribute
+ * was already used for another purpose (local random MAC address). The
+ * NL80211_ATTR_BSSID attribute was added to fix this. For backwards
+ * compatibility with older userspace components, also use the
+ * NL80211_ATTR_MAC value here if it can be determined to be used for
+ * the specific BSSID use case instead of the random MAC address
+ * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use).
+ */
+ if (info->attrs[NL80211_ATTR_BSSID])
+ memcpy(request->bssid,
+ nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN);
+ else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) &&
+ info->attrs[NL80211_ATTR_MAC])
memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]),
ETH_ALEN);
else
@@ -6765,9 +6820,8 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
if (WARN_ON(i >= n_plans))
return -EINVAL;
- err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX,
- nla_data(attr), nla_len(attr),
- nl80211_plan_policy);
+ err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
+ attr, nl80211_plan_policy);
if (err)
return err;
@@ -6856,9 +6910,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
tmp) {
struct nlattr *rssi;
- err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- nla_data(attr), nla_len(attr),
- nl80211_match_policy);
+ err = nla_parse_nested(tb,
+ NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+ attr, nl80211_match_policy);
if (err)
return ERR_PTR(err);
/* add other standalone attributes here */
@@ -7029,9 +7083,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
tmp) {
struct nlattr *ssid, *rssi;
- err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- nla_data(attr), nla_len(attr),
- nl80211_match_policy);
+ err = nla_parse_nested(tb,
+ NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+ attr, nl80211_match_policy);
if (err)
goto out_free;
ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
@@ -7643,6 +7697,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
struct survey_info survey;
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
@@ -7655,7 +7710,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
return res;
/* prepare_wdev_dump parsed the attributes */
- radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
if (!wdev->netdev) {
res = -EINVAL;
@@ -7708,8 +7763,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct ieee80211_channel *chan;
- const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL;
- int err, ssid_len, ie_len = 0, sae_data_len = 0;
+ const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL;
+ int err, ssid_len, ie_len = 0, auth_data_len = 0;
enum nl80211_auth_type auth_type;
struct key_parse key;
bool local_state_change;
@@ -7789,17 +7844,23 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE))
return -EINVAL;
- if (auth_type == NL80211_AUTHTYPE_SAE &&
- !info->attrs[NL80211_ATTR_SAE_DATA])
+ if ((auth_type == NL80211_AUTHTYPE_SAE ||
+ auth_type == NL80211_AUTHTYPE_FILS_SK ||
+ auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+ auth_type == NL80211_AUTHTYPE_FILS_PK) &&
+ !info->attrs[NL80211_ATTR_AUTH_DATA])
return -EINVAL;
- if (info->attrs[NL80211_ATTR_SAE_DATA]) {
- if (auth_type != NL80211_AUTHTYPE_SAE)
+ if (info->attrs[NL80211_ATTR_AUTH_DATA]) {
+ if (auth_type != NL80211_AUTHTYPE_SAE &&
+ auth_type != NL80211_AUTHTYPE_FILS_SK &&
+ auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
+ auth_type != NL80211_AUTHTYPE_FILS_PK)
return -EINVAL;
- sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]);
- sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]);
+ auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
+ auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
/* need to include at least Auth Transaction and Status Code */
- if (sae_data_len < 4)
+ if (auth_data_len < 4)
return -EINVAL;
}
@@ -7816,7 +7877,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
ssid, ssid_len, ie, ie_len,
key.p.key, key.p.key_len, key.idx,
- sae_data, sae_data_len);
+ auth_data, auth_data_len);
wdev_unlock(dev->ieee80211_ptr);
return err;
}
@@ -7995,6 +8056,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
req.flags |= ASSOC_REQ_USE_RRM;
}
+ if (info->attrs[NL80211_ATTR_FILS_KEK]) {
+ req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]);
+ req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]);
+ if (!info->attrs[NL80211_ATTR_FILS_NONCES])
+ return -EINVAL;
+ req.fils_nonces =
+ nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
+ }
+
err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
if (!err) {
wdev_lock(dev->ieee80211_ptr);
@@ -8152,7 +8222,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
ibss.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+ err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC,
+ ibss.beacon_interval);
if (err)
return err;
@@ -8478,14 +8549,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
*/
phy_idx = cb->args[0] - 1;
} else {
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ attrbuf, nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_err;
- rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(rdev)) {
err = PTR_ERR(rdev);
goto out_err;
@@ -8493,9 +8564,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
phy_idx = rdev->wiphy_idx;
rdev = NULL;
- if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA])
- cb->args[1] =
- (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA];
+ if (attrbuf[NL80211_ATTR_TESTDATA])
+ cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA];
}
if (cb->args[1]) {
@@ -8726,6 +8796,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nl80211_update_connect_params(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_connect_params connect = {};
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ u32 changed = 0;
+ int ret;
+
+ if (!rdev->ops->update_connect_params)
+ return -EOPNOTSUPP;
+
+ if (info->attrs[NL80211_ATTR_IE]) {
+ if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+ return -EINVAL;
+ connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+ connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ changed |= UPDATE_ASSOC_IES;
+ }
+
+ wdev_lock(dev->ieee80211_ptr);
+ if (!wdev->current_bss)
+ ret = -ENOLINK;
+ else
+ ret = rdev_update_connect_params(rdev, dev, &connect, changed);
+ wdev_unlock(dev->ieee80211_ptr);
+
+ return ret;
+}
+
static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -9417,7 +9518,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
setup.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+ err = cfg80211_validate_beacon_int(rdev,
+ NL80211_IFTYPE_MESH_POINT,
+ setup.beacon_interval);
if (err)
return err;
}
@@ -9728,9 +9831,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
if (!rdev->wiphy.wowlan->tcp)
return -EINVAL;
- err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP,
- nla_data(attr), nla_len(attr),
- nl80211_wowlan_tcp_policy);
+ err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
+ nl80211_wowlan_tcp_policy);
if (err)
return err;
@@ -9875,9 +9977,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
goto out;
}
- err = nla_parse(tb, NL80211_ATTR_MAX,
- nla_data(attr), nla_len(attr),
- nl80211_policy);
+ err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy);
if (err)
goto out;
@@ -9911,10 +10011,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
goto set_wakeup;
}
- err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG,
- nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
- nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
- nl80211_wowlan_policy);
+ err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
+ info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
+ nl80211_wowlan_policy);
if (err)
return err;
@@ -9996,8 +10095,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
rem) {
u8 *mask_pat;
- nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
- nla_len(pat), NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ NULL);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10207,8 +10306,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
- err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule),
- nla_len(rule), nl80211_coalesce_policy);
+ err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
+ nl80211_coalesce_policy);
if (err)
return err;
@@ -10246,8 +10345,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
- nla_len(pat), NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -10366,10 +10464,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_REKEY_DATA])
return -EINVAL;
- err = nla_parse(tb, MAX_NL80211_REKEY_DATA,
- nla_data(info->attrs[NL80211_ATTR_REKEY_DATA]),
- nla_len(info->attrs[NL80211_ATTR_REKEY_DATA]),
- nl80211_rekey_policy);
+ err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
+ info->attrs[NL80211_ATTR_REKEY_DATA],
+ nl80211_rekey_policy);
if (err)
return err;
@@ -10518,7 +10615,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
return -EOPNOTSUPP;
- if (wdev->p2p_started)
+ if (wdev_running(wdev))
return 0;
if (rfkill_blocked(rdev->rfkill))
@@ -10528,7 +10625,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- wdev->p2p_started = true;
+ wdev->is_running = true;
rdev->opencount++;
return 0;
@@ -10560,7 +10657,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
if (wdev->iftype != NL80211_IFTYPE_NAN)
return -EOPNOTSUPP;
- if (wdev->nan_started)
+ if (wdev_running(wdev))
return -EEXIST;
if (rfkill_blocked(rdev->rfkill))
@@ -10583,7 +10680,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- wdev->nan_started = true;
+ wdev->is_running = true;
rdev->opencount++;
return 0;
@@ -10638,8 +10735,7 @@ static int handle_nan_filter(struct nlattr *attr_filter,
i = 0;
nla_for_each_nested(attr, attr_filter, rem) {
- filter[i].filter = kmemdup(nla_data(attr), nla_len(attr),
- GFP_KERNEL);
+ filter[i].filter = nla_memdup(attr, GFP_KERNEL);
filter[i].len = nla_len(attr);
i++;
}
@@ -10668,7 +10764,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
if (wdev->iftype != NL80211_IFTYPE_NAN)
return -EOPNOTSUPP;
- if (!wdev->nan_started)
+ if (!wdev_running(wdev))
return -ENOTCONN;
if (!info->attrs[NL80211_ATTR_NAN_FUNC])
@@ -10678,10 +10774,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
wdev->owner_nlportid != info->snd_portid)
return -ENOTCONN;
- err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX,
- nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]),
- nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]),
- nl80211_nan_func_policy);
+ err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
+ info->attrs[NL80211_ATTR_NAN_FUNC],
+ nl80211_nan_func_policy);
if (err)
return err;
@@ -10776,9 +10871,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
if (tb[NL80211_NAN_FUNC_SRF]) {
struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
- err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
- nla_data(tb[NL80211_NAN_FUNC_SRF]),
- nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL);
+ err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
+ tb[NL80211_NAN_FUNC_SRF],
+ nl80211_nan_srf_policy);
if (err)
goto out;
@@ -10904,7 +10999,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb,
if (wdev->iftype != NL80211_IFTYPE_NAN)
return -EOPNOTSUPP;
- if (!wdev->nan_started)
+ if (!wdev_running(wdev))
return -ENOTCONN;
if (!info->attrs[NL80211_ATTR_COOKIE])
@@ -10932,7 +11027,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb,
if (wdev->iftype != NL80211_IFTYPE_NAN)
return -EOPNOTSUPP;
- if (!wdev->nan_started)
+ if (!wdev_running(wdev))
return -ENOTCONN;
if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
@@ -11244,10 +11339,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
- if (wdev->netdev &&
- !netif_running(wdev->netdev))
- return -ENETDOWN;
- if (!wdev->netdev && !wdev->p2p_started)
+ if (!wdev_running(wdev))
return -ENETDOWN;
}
@@ -11277,6 +11369,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
struct cfg80211_registered_device **rdev,
struct wireless_dev **wdev)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
u32 vid, subcmd;
unsigned int i;
int vcmd_idx = -1;
@@ -11312,31 +11405,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
}
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ attrbuf, nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_unlock;
- if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
- !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+ if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
+ !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
err = -EINVAL;
goto out_unlock;
}
- *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(*wdev))
*wdev = NULL;
- *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(*rdev)) {
err = PTR_ERR(*rdev);
goto out_unlock;
}
- vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
- subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
+ vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
+ subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
const struct wiphy_vendor_command *vcmd;
@@ -11360,9 +11450,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
goto out_unlock;
}
- if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
- data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
- data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
+ data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]);
}
/* 0 is the first index - add 1 to parse only once */
@@ -11410,10 +11500,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
return -EINVAL;
if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
- if (wdev->netdev &&
- !netif_running(wdev->netdev))
- return -ENETDOWN;
- if (!wdev->netdev && !wdev->p2p_started)
+ if (!wdev_running(wdev))
return -ENETDOWN;
}
}
@@ -11726,6 +11813,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
return 0;
}
+static int nl80211_set_multicast_to_unicast(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ const struct nlattr *nla;
+ bool enabled;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ if (!rdev->ops->set_multicast_to_unicast)
+ return -EOPNOTSUPP;
+
+ if (wdev->iftype != NL80211_IFTYPE_AP &&
+ wdev->iftype != NL80211_IFTYPE_P2P_GO)
+ return -EOPNOTSUPP;
+
+ nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED];
+ enabled = nla_get_flag(nla);
+
+ return rdev_set_multicast_to_unicast(rdev, dev, enabled);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -11784,29 +11896,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
info->user_ptr[1] = wdev;
}
- if (dev) {
- if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
- !netif_running(dev)) {
- if (rtnl)
- rtnl_unlock();
- return -ENETDOWN;
- }
+ if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
+ !wdev_running(wdev)) {
+ if (rtnl)
+ rtnl_unlock();
+ return -ENETDOWN;
+ }
+ if (dev)
dev_hold(dev);
- } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
- if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
- !wdev->p2p_started) {
- if (rtnl)
- rtnl_unlock();
- return -ENETDOWN;
- }
- if (wdev->iftype == NL80211_IFTYPE_NAN &&
- !wdev->nan_started) {
- if (rtnl)
- rtnl_unlock();
- return -ENETDOWN;
- }
- }
info->user_ptr[0] = rdev;
}
@@ -12179,6 +12277,14 @@ static const struct genl_ops nl80211_ops[] = {
NL80211_FLAG_NEED_RTNL,
},
{
+ .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
+ .doit = nl80211_update_connect_params,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
.cmd = NL80211_CMD_DISCONNECT,
.doit = nl80211_disconnect,
.policy = nl80211_policy,
@@ -12599,6 +12705,29 @@ static const struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+ .doit = nl80211_set_multicast_to_unicast,
+ .policy = nl80211_policy,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_NEED_RTNL,
+ },
+};
+
+static struct genl_family nl80211_fam __ro_after_init = {
+ .name = NL80211_GENL_NAME, /* have users key off the name instead */
+ .hdrsize = 0, /* no private header */
+ .version = 1, /* no particular meaning now */
+ .maxattr = NL80211_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = nl80211_pre_doit,
+ .post_doit = nl80211_post_doit,
+ .module = THIS_MODULE,
+ .ops = nl80211_ops,
+ .n_ops = ARRAY_SIZE(nl80211_ops),
+ .mcgrps = nl80211_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
};
/* notification functions */
@@ -14388,13 +14517,17 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
bool schedule_destroy_work = false;
- bool schedule_scan_stop = false;
struct cfg80211_sched_scan_request *sched_scan_req =
rcu_dereference(rdev->sched_scan_req);
if (sched_scan_req && notify->portid &&
- sched_scan_req->owner_nlportid == notify->portid)
- schedule_scan_stop = true;
+ sched_scan_req->owner_nlportid == notify->portid) {
+ sched_scan_req->owner_nlportid = 0;
+
+ if (rdev->ops->sched_scan_stop &&
+ rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ schedule_work(&rdev->sched_scan_stop_wk);
+ }
list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) {
cfg80211_mlme_unregister_socket(wdev, notify->portid);
@@ -14425,12 +14558,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
spin_unlock(&rdev->destroy_list_lock);
schedule_work(&rdev->destroy_work);
}
- } else if (schedule_scan_stop) {
- sched_scan_req->owner_nlportid = 0;
-
- if (rdev->ops->sched_scan_stop &&
- rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
- schedule_work(&rdev->sched_scan_stop_wk);
}
}
@@ -14563,12 +14690,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
/* initialisation/exit functions */
-int nl80211_init(void)
+int __init nl80211_init(void)
{
int err;
- err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops,
- nl80211_mcgrps);
+ err = genl_register_family(&nl80211_fam);
if (err)
return err;
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 11cf83c8ad4f..2f425075ada8 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_update_connect_params(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_connect_params *sme, u32 changed)
+{
+ int ret;
+ trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed);
+ ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
static inline int rdev_disconnect(struct cfg80211_registered_device *rdev,
struct net_device *dev, u16 reason_code)
{
@@ -562,6 +574,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ const bool enabled)
+{
+ int ret;
+ trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
+ ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
{
trace_rdev_rfkill_poll(&rdev->wiphy);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a77db333927e..5e0d19380302 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -39,6 +39,7 @@ struct cfg80211_conn {
CFG80211_CONN_ASSOCIATING,
CFG80211_CONN_ASSOC_FAILED,
CFG80211_CONN_DEAUTH,
+ CFG80211_CONN_ABANDON,
CFG80211_CONN_CONNECTED,
} state;
u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
@@ -206,6 +207,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
NULL, 0,
WLAN_REASON_DEAUTH_LEAVING, false);
+ /* fall through */
+ case CFG80211_CONN_ABANDON:
/* free directly, disconnected event already sent */
cfg80211_sme_free(wdev);
return 0;
@@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
schedule_work(&rdev->conn_work);
}
+void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+
+ if (!wdev->conn)
+ return;
+
+ wdev->conn->state = CFG80211_CONN_ABANDON;
+ schedule_work(&rdev->conn_work);
+}
+
static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev,
const u8 *ies, size_t ies_len,
const u8 **out_ies, size_t *out_ies_len)
@@ -1088,7 +1102,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
err = cfg80211_sme_disconnect(wdev, reason);
else if (!rdev->ops->disconnect)
cfg80211_mlme_down(rdev, dev);
- else if (wdev->current_bss)
+ else if (wdev->ssid_len)
err = rdev_disconnect(rdev, dev, reason);
return err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a3d0a91b1e09..ea1b47e04fa4 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect,
__entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid))
);
+TRACE_EVENT(rdev_update_connect_params,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_connect_params *sme, u32 changed),
+ TP_ARGS(wiphy, netdev, sme, changed),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u32, changed)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->changed = changed;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed)
+);
+
TRACE_EVENT(rdev_set_cqm_rssi_config,
TP_PROTO(struct wiphy *wiphy,
struct net_device *netdev, s32 rssi_thold,
@@ -3030,6 +3048,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
TP_ARGS(wiphy, wdev)
);
+
+TRACE_EVENT(rdev_set_multicast_to_unicast,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ const bool enabled),
+ TP_ARGS(wiphy, netdev, enabled),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(bool, enabled)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->enabled = enabled;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ BOOL_TO_STR(__entry->enabled))
+);
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 659b507b347d..e9d040d29846 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -13,6 +13,7 @@
#include <net/dsfield.h>
#include <linux/if_vlan.h>
#include <linux/mpls.h>
+#include <linux/gcd.h>
#include "core.h"
#include "rdev-ops.h"
@@ -1378,6 +1379,25 @@ static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
return false;
}
+static size_t skip_ie(const u8 *ies, size_t ielen, size_t pos)
+{
+ /* we assume a validly formed IEs buffer */
+ u8 len = ies[pos + 1];
+
+ pos += 2 + len;
+
+ /* the IE itself must have 255 bytes for fragments to follow */
+ if (len < 255)
+ return pos;
+
+ while (pos < ielen && ies[pos] == WLAN_EID_FRAGMENT) {
+ len = ies[pos + 1];
+ pos += 2 + len;
+ }
+
+ return pos;
+}
+
size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
const u8 *ids, int n_ids,
const u8 *after_ric, int n_after_ric,
@@ -1387,14 +1407,14 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
- pos += 2 + ies[pos + 1];
+ pos = skip_ie(ies, ielen, pos);
while (pos < ielen &&
!ieee80211_id_in_list(after_ric, n_after_ric,
ies[pos]))
- pos += 2 + ies[pos + 1];
+ pos = skip_ie(ies, ielen, pos);
} else {
- pos += 2 + ies[pos + 1];
+ pos = skip_ie(ies, ielen, pos);
}
}
@@ -1555,31 +1575,57 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
}
EXPORT_SYMBOL(ieee80211_chandef_to_operating_class);
-int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
- u32 beacon_int)
+static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
+ u32 *beacon_int_gcd,
+ bool *beacon_int_different)
{
struct wireless_dev *wdev;
- int res = 0;
- if (beacon_int < 10 || beacon_int > 10000)
- return -EINVAL;
+ *beacon_int_gcd = 0;
+ *beacon_int_different = false;
- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry(wdev, &wiphy->wdev_list, list) {
if (!wdev->beacon_interval)
continue;
- if (wdev->beacon_interval != beacon_int) {
- res = -EINVAL;
- break;
+
+ if (!*beacon_int_gcd) {
+ *beacon_int_gcd = wdev->beacon_interval;
+ continue;
}
+
+ if (wdev->beacon_interval == *beacon_int_gcd)
+ continue;
+
+ *beacon_int_different = true;
+ *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval);
}
- return res;
+ if (new_beacon_int && *beacon_int_gcd != new_beacon_int) {
+ if (*beacon_int_gcd)
+ *beacon_int_different = true;
+ *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int);
+ }
+}
+
+int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
+ enum nl80211_iftype iftype, u32 beacon_int)
+{
+ /*
+ * This is just a basic pre-condition check; if interface combinations
+ * are possible the driver must already be checking those with a call
+ * to cfg80211_check_combinations(), in which case we'll validate more
+ * through the cfg80211_calculate_bi_data() call and code in
+ * cfg80211_iter_combinations().
+ */
+
+ if (beacon_int < 10 || beacon_int > 10000)
+ return -EINVAL;
+
+ return 0;
}
int cfg80211_iter_combinations(struct wiphy *wiphy,
- const int num_different_channels,
- const u8 radar_detect,
- const int iftype_num[NUM_NL80211_IFTYPES],
+ struct iface_combination_params *params,
void (*iter)(const struct ieee80211_iface_combination *c,
void *data),
void *data)
@@ -1589,8 +1635,23 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
int i, j, iftype;
int num_interfaces = 0;
u32 used_iftypes = 0;
+ u32 beacon_int_gcd;
+ bool beacon_int_different;
+
+ /*
+ * This is a bit strange, since the iteration used to rely only on
+ * the data given by the driver, but here it now relies on context,
+ * in form of the currently operating interfaces.
+ * This is OK for all current users, and saves us from having to
+ * push the GCD calculations into all the drivers.
+ * In the future, this should probably rely more on data that's in
+ * cfg80211 already - the only thing not would appear to be any new
+ * interfaces (while being brought up) and channel/radar data.
+ */
+ cfg80211_calculate_bi_data(wiphy, params->new_beacon_int,
+ &beacon_int_gcd, &beacon_int_different);
- if (radar_detect) {
+ if (params->radar_detect) {
rcu_read_lock();
regdom = rcu_dereference(cfg80211_regdomain);
if (regdom)
@@ -1599,8 +1660,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
}
for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
- num_interfaces += iftype_num[iftype];
- if (iftype_num[iftype] > 0 &&
+ num_interfaces += params->iftype_num[iftype];
+ if (params->iftype_num[iftype] > 0 &&
!(wiphy->software_iftypes & BIT(iftype)))
used_iftypes |= BIT(iftype);
}
@@ -1614,7 +1675,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if (num_interfaces > c->max_interfaces)
continue;
- if (num_different_channels > c->num_different_channels)
+ if (params->num_different_channels > c->num_different_channels)
continue;
limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
@@ -1629,16 +1690,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
all_iftypes |= limits[j].types;
if (!(limits[j].types & BIT(iftype)))
continue;
- if (limits[j].max < iftype_num[iftype])
+ if (limits[j].max < params->iftype_num[iftype])
goto cont;
- limits[j].max -= iftype_num[iftype];
+ limits[j].max -= params->iftype_num[iftype];
}
}
- if (radar_detect != (c->radar_detect_widths & radar_detect))
+ if (params->radar_detect !=
+ (c->radar_detect_widths & params->radar_detect))
goto cont;
- if (radar_detect && c->radar_detect_regions &&
+ if (params->radar_detect && c->radar_detect_regions &&
!(c->radar_detect_regions & BIT(region)))
goto cont;
@@ -1650,6 +1712,14 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if ((all_iftypes & used_iftypes) != used_iftypes)
goto cont;
+ if (beacon_int_gcd) {
+ if (c->beacon_int_min_gcd &&
+ beacon_int_gcd < c->beacon_int_min_gcd)
+ goto cont;
+ if (!c->beacon_int_min_gcd && beacon_int_different)
+ goto cont;
+ }
+
/* This combination covered all interface types and
* supported the requested numbers, so we're good.
*/
@@ -1672,14 +1742,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
}
int cfg80211_check_combinations(struct wiphy *wiphy,
- const int num_different_channels,
- const u8 radar_detect,
- const int iftype_num[NUM_NL80211_IFTYPES])
+ struct iface_combination_params *params)
{
int err, num = 0;
- err = cfg80211_iter_combinations(wiphy, num_different_channels,
- radar_detect, iftype_num,
+ err = cfg80211_iter_combinations(wiphy, params,
cfg80211_iter_sum_ifcombs, &num);
if (err)
return err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f83b74d3e2ac..079c883aa96e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -51,7 +51,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/tcp_states.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/fcntl.h>
#include <linux/termios.h> /* For TIOCINQ/OUTQ */
#include <linux/notifier.h>
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index 43239527a205..a06dfe143c67 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -70,7 +70,7 @@ static struct ctl_table x25_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { 0, },
+ { },
};
void __init x25_register_sysctl(void)
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index fd5ffb25873f..bcaa180d6a3f 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -29,7 +29,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/init.h>
#include <net/x25.h>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5bf7e1bfeac7..177e208e8ff5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3113,6 +3113,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void)
{
+ flow_cache_hp_init();
register_pernet_subsys(&xfrm_net_ops);
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 419bf5d463bd..64e3c82eedf6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -20,7 +20,7 @@
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/audit.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/ktime.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
@@ -388,14 +388,6 @@ static void xfrm_state_gc_task(struct work_struct *work)
xfrm_state_gc_destroy(x);
}
-static inline unsigned long make_jiffies(long secs)
-{
- if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
- return MAX_SCHEDULE_TIMEOUT-1;
- else
- return secs*HZ;
-}
-
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
{
struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
@@ -1412,7 +1404,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
- tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL);
+ tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 671a1d0333f0..9705c279494b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -27,7 +27,7 @@
#include <net/xfrm.h>
#include <net/netlink.h>
#include <net/ah.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/in6.h>
#endif