summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c21
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c105
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/8021q/vlanproc.c6
-rw-r--r--net/9p/client.c11
-rw-r--r--net/appletalk/atalk_proc.c8
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/atm_sysfs.c12
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/atm/pvc.c5
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c10
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c28
-rw-r--r--net/batman-adv/bat_iv_ogm.h2
-rw-r--r--net/batman-adv/bat_v.c4
-rw-r--r--net/batman-adv/bat_v.h2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bat_v_ogm.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c24
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c158
-rw-r--r--net/batman-adv/distributed-arp-table.h10
-rw-r--r--net/batman-adv/fragmentation.c5
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c7
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c11
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/icmp_socket.c3
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/log.c3
-rw-r--r--net/batman-adv/log.h2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h16
-rw-r--r--net/batman-adv/multicast.c303
-rw-r--r--net/batman-adv/multicast.h20
-rw-r--r--net/batman-adv/netlink.c90
-rw-r--r--net/batman-adv/netlink.h2
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c6
-rw-r--r--net/batman-adv/originator.h6
-rw-r--r--net/batman-adv/routing.c27
-rw-r--r--net/batman-adv/routing.h2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/send.h2
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/soft-interface.h2
-rw-r--r--net/batman-adv/sysfs.c2
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/tp_meter.h2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h13
-rw-r--r--net/bluetooth/hci_conn.c29
-rw-r--r--net/bluetooth/hci_event.c15
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/mgmt.c1
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c5
-rw-r--r--net/bluetooth/smp.c8
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_if.c35
-rw-r--r--net/bridge/br_netfilter_hooks.c4
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/br_sysfs_if.c36
-rw-r--r--net/bridge/br_vlan.c2
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_among.c55
-rw-r--r--net/bridge/netfilter/ebt_ip.c58
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c118
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c79
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/gw.c2
-rw-r--r--net/can/raw.c6
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/ceph_common.c10
-rw-r--r--net/ceph/crypto.c6
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/messenger.c188
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c67
-rw-r--r--net/ceph/osdmap.c71
-rw-r--r--net/ceph/striper.c261
-rw-r--r--net/compat.c136
-rw-r--r--net/core/dev.c115
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dev_ioctl.c7
-rw-r--r--net/core/devlink.c170
-rw-r--r--net/core/dst_cache.c4
-rw-r--r--net/core/ethtool.c83
-rw-r--r--net/core/fib_notifier.c12
-rw-r--r--net/core/fib_rules.c110
-rw-r--r--net/core/filter.c814
-rw-r--r--net/core/flow_dissector.c16
-rw-r--r--net/core/net-procfs.c6
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c123
-rw-r--r--net/core/pktgen.c15
-rw-r--r--net/core/rtnetlink.c19
-rw-r--r--net/core/skbuff.c99
-rw-r--r--net/core/sock.c111
-rw-r--r--net/core/sock_diag.c12
-rw-r--r--net/core/sysctl_net_core.c13
-rw-r--r--net/core/utils.c23
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/proto.c5
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/dsa_priv.h8
-rw-r--r--net/dsa/legacy.c2
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/slave.c61
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h26
-rw-r--r--net/ieee802154/6lowpan/core.c13
-rw-r--r--net/ieee802154/6lowpan/reassembly.c148
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c76
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c36
-rw-r--r--net/ipv4/fib_trie.c38
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/inet_fragment.c355
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c256
-rw-r--r--net/ipv4/ip_gre.c24
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_output.c20
-rw-r--r--net/ipv4/ip_sockglue.c40
-rw-r--r--net/ipv4/ip_tunnel.c103
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipmr.c698
-rw-r--r--net/ipv4/ipmr_base.c365
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/Makefile4
-rw-r--r--net/ipv4/netfilter/arp_tables.c33
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c17
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c58
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c67
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c20
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c6
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c168
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c34
-rw-r--r--net/ipv4/tcp.c80
-rw-r--r--net/ipv4/tcp_bbr.c38
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c34
-rw-r--r--net/ipv4/tcp_ipv4.c44
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c55
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c102
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c3
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv4/xfrm4_policy.c7
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c91
-rw-r--r--net/ipv6/af_inet6.c71
-rw-r--r--net/ipv6/anycast.c14
-rw-r--r--net/ipv6/datagram.c35
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c35
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c42
-rw-r--r--net/ipv6/ip6_output.c57
-rw-r--r--net/ipv6/ip6_tunnel.c41
-rw-r--r--net/ipv6/ip6_vti.c45
-rw-r--r--net/ipv6/ip6mr.c1111
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c7
-rw-r--r--net/ipv6/netfilter.c9
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c123
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c65
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c20
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c15
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/proc.c12
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c237
-rw-r--r--net/ipv6/route.c415
-rw-r--r--net/ipv6/seg6_iptunnel.c23
-rw-r--r--net/ipv6/seg6_local.c4
-rw-r--r--net/ipv6/sit.c20
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
-rw-r--r--net/ipv6/tcp_ipv6.c29
-rw-r--r--net/ipv6/udp.c103
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c3
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c5
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/iucv/af_iucv.c9
-rw-r--r--net/kcm/kcmproc.c4
-rw-r--r--net/kcm/kcmsock.c34
-rw-r--r--net/l2tp/l2tp_core.c379
-rw-r--r--net/l2tp/l2tp_core.h30
-rw-r--r--net/l2tp/l2tp_ip.c15
-rw-r--r--net/l2tp/l2tp_ip6.c13
-rw-r--r--net/l2tp/l2tp_netlink.c22
-rw-r--r--net/l2tp/l2tp_ppp.c76
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/llc/llc_c_ac.c15
-rw-r--r--net/llc/llc_conn.c32
-rw-r--r--net/llc/llc_proc.c4
-rw-r--r--net/llc/llc_sap.c7
-rw-r--r--net/mac80211/agg-rx.c14
-rw-r--r--net/mac80211/cfg.c13
-rw-r--r--net/mac80211/debugfs.c2
-rw-r--r--net/mac80211/debugfs_sta.c10
-rw-r--r--net/mac80211/ht.c15
-rw-r--r--net/mac80211/ibss.c3
-rw-r--r--net/mac80211/ieee80211_i.h12
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/key.c8
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/michael.c2
-rw-r--r--net/mac80211/mlme.c185
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c8
-rw-r--r--net/mac80211/rx.c231
-rw-r--r--net/mac80211/scan.c4
-rw-r--r--net/mac80211/sta_info.c6
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c11
-rw-r--r--net/mac80211/tx.c65
-rw-r--r--net/mac80211/util.c47
-rw-r--r--net/mac80211/vht.c39
-rw-r--r--net/mac80211/wpa.c8
-rw-r--r--net/mac802154/trace.h8
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/ncsi/Makefile2
-rw-r--r--net/ncsi/internal.h3
-rw-r--r--net/ncsi/ncsi-manage.c30
-rw-r--r--net/ncsi/ncsi-netlink.c427
-rw-r--r--net/ncsi/ncsi-netlink.h20
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/nf_conncount.c14
-rw-r--r--net/netfilter/nf_conntrack_acct.c6
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_ecache.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c92
-rw-r--r--net/netfilter/nf_conntrack_snmp.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c6
-rw-r--r--net/netfilter/nf_log.c2
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_ftp.c7
-rw-r--r--net/netfilter/nf_nat_irc.c7
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c247
-rw-r--r--net/netfilter/nf_tables_inet.c75
-rw-r--r--net/netfilter/nf_tables_netdev.c142
-rw-r--r--net/netfilter/nfnetlink_acct.c3
-rw-r--r--net/netfilter/nfnetlink_cthelper.c25
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c26
-rw-r--r--net/netfilter/nfnetlink_queue.c14
-rw-r--r--net/netfilter/nft_chain_filter.c398
-rw-r--r--net/netfilter/nft_ct.c38
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_lookup.c4
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/x_tables.c221
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_RATEEST.c91
-rw-r--r--net/netfilter/xt_TEE.c73
-rw-r--r--net/netfilter/xt_cluster.c10
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_connmark.c77
-rw-r--r--net/netfilter/xt_hashlimit.c19
-rw-r--r--net/netfilter/xt_limit.c2
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_rateest.c10
-rw-r--r--net/netfilter/xt_recent.c10
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netfilter/xt_time.c13
-rw-r--r--net/netlabel/netlabel_unlabeled.c10
-rw-r--r--net/netlink/af_netlink.c10
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/netrom/af_netrom.c15
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/meter.c12
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/phonet/socket.c5
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/qrtr/smd.c1
-rw-r--r--net/rds/af_rds.c14
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/message.c163
-rw-r--r--net/rds/rds.h31
-rw-r--r--net/rds/recv.c42
-rw-r--r--net/rds/send.c69
-rw-r--r--net/rds/tcp.c115
-rw-r--r--net/rds/tcp_listen.c14
-rw-r--r--net/rose/af_rose.c13
-rw-r--r--net/rxrpc/af_rxrpc.c15
-rw-r--r--net/rxrpc/ar-internal.h77
-rw-r--r--net/rxrpc/call_accept.c27
-rw-r--r--net/rxrpc/call_event.c5
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c6
-rw-r--r--net/rxrpc/conn_object.c10
-rw-r--r--net/rxrpc/conn_service.c1
-rw-r--r--net/rxrpc/input.c29
-rw-r--r--net/rxrpc/local_object.c65
-rw-r--r--net/rxrpc/net_ns.c24
-rw-r--r--net/rxrpc/output.c59
-rw-r--r--net/rxrpc/peer_event.c98
-rw-r--r--net/rxrpc/peer_object.c93
-rw-r--r--net/rxrpc/proc.c6
-rw-r--r--net/rxrpc/protocol.h6
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/rxrpc/security.c3
-rw-r--r--net/rxrpc/sendmsg.c10
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c203
-rw-r--r--net/sched/act_bpf.c24
-rw-r--r--net/sched/act_connmark.c11
-rw-r--r--net/sched/act_csum.c15
-rw-r--r--net/sched/act_gact.c24
-rw-r--r--net/sched/act_ife.c10
-rw-r--r--net/sched/act_ipt.c29
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c11
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_police.c13
-rw-r--r--net/sched/act_sample.c13
-rw-r--r--net/sched/act_simple.c12
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c15
-rw-r--r--net/sched/act_tunnel_key.c20
-rw-r--r--net/sched/act_vlan.c17
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/em_ipt.c257
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_generic.c39
-rw-r--r--net/sched/sch_htb.c11
-rw-r--r--net/sched/sch_netem.c2
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--net/sched/sch_tbf.c3
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/auth.c146
-rw-r--r--net/sctp/chunk.c24
-rw-r--r--net/sctp/diag.c (renamed from net/sctp/sctp_diag.c)31
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/sctp/input.c21
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c57
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/offload.c2
-rw-r--r--net/sctp/output.c52
-rw-r--r--net/sctp/proc.c90
-rw-r--r--net/sctp/protocol.c104
-rw-r--r--net/sctp/sm_make_chunk.c45
-rw-r--r--net/sctp/sm_sideeffect.c13
-rw-r--r--net/sctp/sm_statefuns.c74
-rw-r--r--net/sctp/socket.c911
-rw-r--r--net/smc/af_smc.c219
-rw-r--r--net/smc/smc.h9
-rw-r--r--net/smc/smc_cdc.c2
-rw-r--r--net/smc/smc_clc.c216
-rw-r--r--net/smc/smc_clc.h22
-rw-r--r--net/smc/smc_close.c25
-rw-r--r--net/smc/smc_core.c103
-rw-r--r--net/smc/smc_core.h16
-rw-r--r--net/smc/smc_ib.c10
-rw-r--r--net/smc/smc_llc.c410
-rw-r--r--net/smc/smc_llc.h41
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/socket.c292
-rw-r--r--net/strparser/strparser.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c42
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/debugfs.c6
-rw-r--r--net/sunrpc/rpc_pipe.c42
-rw-r--r--net/sunrpc/svc.c118
-rw-r--r--net/sunrpc/svc_xprt.c34
-rw-r--r--net/sunrpc/svcsock.c21
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/Kconfig8
-rw-r--r--net/tipc/Makefile7
-rw-r--r--net/tipc/addr.c159
-rw-r--r--net/tipc/addr.h47
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c160
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/core.h23
-rw-r--r--net/tipc/diag.c114
-rw-r--r--net/tipc/discover.c392
-rw-r--r--net/tipc/discover.h8
-rw-r--r--net/tipc/group.c3
-rw-r--r--net/tipc/link.c39
-rw-r--r--net/tipc/link.h4
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tipc/name_distr.c152
-rw-r--r--net/tipc/name_distr.h3
-rw-r--r--net/tipc/name_table.c1164
-rw-r--r--net/tipc/name_table.h66
-rw-r--r--net/tipc/net.c80
-rw-r--r--net/tipc/net.h5
-rw-r--r--net/tipc/node.c129
-rw-r--r--net/tipc/node.h9
-rw-r--r--net/tipc/server.c710
-rw-r--r--net/tipc/socket.c189
-rw-r--r--net/tipc/socket.h10
-rw-r--r--net/tipc/subscr.c363
-rw-r--r--net/tipc/subscr.h68
-rw-r--r--net/tipc/topsrv.c703
-rw-r--r--net/tipc/topsrv.h (renamed from net/tipc/server.h)57
-rw-r--r--net/tipc/udp_media.c14
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_main.c246
-rw-r--r--net/tls/tls_sw.c713
-rw-r--r--net/unix/af_unix.c20
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/wireless/Kconfig13
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/chan.c9
-rw-r--r--net/wireless/core.h12
-rw-r--r--net/wireless/ibss.c27
-rw-r--r--net/wireless/mesh.c16
-rw-r--r--net/wireless/mlme.c9
-rw-r--r--net/wireless/nl80211.c408
-rw-r--r--net/wireless/rdev-ops.h30
-rw-r--r--net/wireless/reg.c206
-rw-r--r--net/wireless/sme.c43
-rw-r--r--net/wireless/trace.h72
-rw-r--r--net/wireless/util.c5
-rw-r--r--net/wireless/wext-core.c6
-rw-r--r--net/wireless/wext-proc.c2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/x25_proc.c12
-rw-r--r--net/x25/x25_subr.c3
-rw-r--r--net/xfrm/xfrm_device.c4
-rw-r--r--net/xfrm/xfrm_input.c9
-rw-r--r--net/xfrm/xfrm_ipcomp.c2
-rw-r--r--net/xfrm/xfrm_output.c5
-rw-r--r--net/xfrm/xfrm_policy.c20
-rw-r--r--net/xfrm/xfrm_proc.c2
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c5
-rw-r--r--net/xfrm/xfrm_user.c21
535 files changed, 16417 insertions, 9201 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..5505ee6ebdbe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -360,6 +360,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct vlan_dev_priv *vlan;
bool last = false;
LIST_HEAD(list);
+ int err;
if (is_vlan_dev(dev)) {
int err = __vlan_device_event(dev, event);
@@ -489,6 +490,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
break;
+
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q));
+ break;
+
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD));
+ break;
}
out:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a8ba51030b75..e23aac3e4d37 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -97,6 +97,9 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
(i) % VLAN_N_VID)))
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto);
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto);
+
/* found in vlan_dev.c */
void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 64aa9f755e1d..4f60e86f4b8d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
* original position later
*/
skb_push(skb, offset);
- skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
- skb->vlan_tci);
+ skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto,
+ skb->vlan_tci, skb->mac_len);
if (!skb)
return false;
skb_pull(skb, offset + VLAN_HLEN);
@@ -165,13 +165,12 @@ struct vlan_vid_info {
int refcount;
};
-static bool vlan_hw_filter_capable(const struct net_device *dev,
- const struct vlan_vid_info *vid_info)
+static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
{
- if (vid_info->proto == htons(ETH_P_8021Q) &&
+ if (proto == htons(ETH_P_8021Q) &&
dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
return true;
- if (vid_info->proto == htons(ETH_P_8021AD) &&
+ if (proto == htons(ETH_P_8021AD) &&
dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
return true;
return false;
@@ -202,11 +201,73 @@ static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
return vid_info;
}
+static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct net_device *real_dev = vlan_info->real_dev;
+ struct vlan_vid_info *vlan_vid_info;
+ int err;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto) {
+ err = vlan_add_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ if (err)
+ goto unwind;
+ }
+ }
+
+ return 0;
+
+unwind:
+ list_for_each_entry_continue_reverse(vlan_vid_info,
+ &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(vlan_filter_push_vids);
+
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct vlan_vid_info *vlan_vid_info;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list)
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(vlan_info->real_dev,
+ vlan_vid_info->proto,
+ vlan_vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_filter_drop_vids);
+
static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
struct vlan_vid_info **pvid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
struct vlan_vid_info *vid_info;
int err;
@@ -214,16 +275,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
if (!vid_info)
return -ENOMEM;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- kfree(vid_info);
- return err;
- }
+ err = vlan_add_rx_filter_info(dev, proto, vid);
+ if (err) {
+ kfree(vid_info);
+ return err;
}
+
list_add(&vid_info->list, &vlan_info->vid_list);
vlan_info->nr_vids++;
*pvid_info = vid_info;
@@ -270,21 +327,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
struct vlan_vid_info *vid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
__be16 proto = vid_info->proto;
u16 vid = vid_info->vid;
int err;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- pr_warn("failed to kill vid %04x/%d for device %s\n",
- proto, vid, dev->name);
- }
- }
+ err = vlan_kill_rx_filter_info(dev, proto, vid);
+ if (err)
+ pr_warn("failed to kill vid %04x/%d for device %s\n",
+ proto, vid, dev->name);
+
list_del(&vid_info->list);
kfree(vid_info);
vlan_info->nr_vids--;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f7e83f6d2e64..236452ebbd9e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -29,6 +29,7 @@
#include <linux/net_tstamp.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/phy.h>
#include <net/arp.h>
#include <net/switchdev.h>
@@ -665,8 +666,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
+ struct phy_device *phydev = vlan->real_dev->phydev;
- if (ops->get_ts_info) {
+ if (phydev && phydev->drv && phydev->drv->ts_info) {
+ return phydev->drv->ts_info(phydev, info);
+ } else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a662ccc166df..a627a5db2125 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net)
if (!vn->proc_vlan_dir)
goto err;
- vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
- vn->proc_vlan_dir, &vlan_fops);
+ vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
+ vn->proc_vlan_dir, &vlan_fops);
if (!vn->proc_vlan_conf)
goto err;
return 0;
@@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev)
if (!strcmp(vlandev->name, name_conf))
return -EINVAL;
vlan->dent =
- proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
+ proc_create_data(vlandev->name, S_IFREG | 0600,
vn->proc_vlan_dir, &vlandev_fops, vlandev);
if (!vlan->dent)
return -ENOBUFS;
diff --git a/net/9p/client.c b/net/9p/client.c
index b433aff5ff13..21e6df1cc70f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -190,7 +190,9 @@ static int parse_opts(char *opts, struct p9_client *clnt)
p9_debug(P9_DEBUG_ERROR,
"problem allocating copy of trans arg\n");
goto free_and_return;
- }
+ }
+
+ v9fs_put_trans(clnt->trans_mod);
clnt->trans_mod = v9fs_get_trans_by_name(s);
if (clnt->trans_mod == NULL) {
pr_info("Could not find request transport: %s\n",
@@ -226,6 +228,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
free_and_return:
+ v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
@@ -769,7 +772,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if (err < 0) {
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
- goto reterr;
+ goto recalc_sigpending;
}
again:
/* Wait for the response */
@@ -804,6 +807,7 @@ again:
if (req->status == REQ_STATUS_RCVD)
err = 0;
}
+recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
@@ -867,7 +871,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (err == -EIO)
c->status = Disconnected;
if (err != -ERESTARTSYS)
- goto reterr;
+ goto recalc_sigpending;
}
if (req->status == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
@@ -885,6 +889,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (req->status == REQ_STATUS_RCVD)
err = 0;
}
+recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index a3bf9d519193..7214aea14cb3 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -257,22 +257,22 @@ int __init atalk_proc_init(void)
if (!atalk_proc_dir)
goto out;
- p = proc_create("interface", S_IRUGO, atalk_proc_dir,
+ p = proc_create("interface", 0444, atalk_proc_dir,
&atalk_seq_interface_fops);
if (!p)
goto out_interface;
- p = proc_create("route", S_IRUGO, atalk_proc_dir,
+ p = proc_create("route", 0444, atalk_proc_dir,
&atalk_seq_route_fops);
if (!p)
goto out_route;
- p = proc_create("socket", S_IRUGO, atalk_proc_dir,
+ p = proc_create("socket", 0444, atalk_proc_dir,
&atalk_seq_socket_fops);
if (!p)
goto out_socket;
- p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops);
+ p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
if (!p)
goto out_arp;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
* fields into the sockaddr.
*/
static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_at sat;
struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
if (atalk_autobind(sk) < 0)
goto out;
- *uaddr_len = sizeof(struct sockaddr_at);
memset(&sat, 0, sizeof(sat));
if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
sat.sat_port = at->src_port;
}
- err = 0;
sat.sat_family = AF_APPLETALK;
memcpy(uaddr, &sat, sizeof(sat));
+ err = sizeof(struct sockaddr_at);
out:
release_sock(sk);
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5d2fed9f5710..39b94ca5f65d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
+static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
+static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
static struct device_attribute *atm_attrs[] = {
&dev_attr_atmaddress,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d4f6029d5109..f07dbc632222 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -893,7 +893,7 @@ static int __init atm_clip_init(void)
{
struct proc_dir_entry *p;
- p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
+ p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/arp\n");
atm_clip_exit_noproc();
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09a1f056712a..01d5d20a6eb1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1042,7 +1042,7 @@ static int __init lane_module_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
- p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
+ p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/lec\n");
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index edc48edc95c1..55410c00c7e2 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -474,7 +474,7 @@ int __init atm_proc_init(void)
for (e = atm_proc_ents; e->name; e++) {
struct proc_dir_entry *dirent;
- dirent = proc_create(e->name, S_IRUGO,
+ dirent = proc_create(e->name, 0444,
atm_proc_root, e->proc_fops);
if (!dirent)
goto err_out_remove;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
}
static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmpvc *addr;
struct atm_vcc *vcc = ATM_SD(sock);
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
- *sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
addr->sap_addr.vci = vcc->vci;
- return 0;
+ return sizeof(struct sockaddr_atmpvc);
}
static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
}
static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmsvc *addr;
- *sockaddr_len = sizeof(struct sockaddr_atmsvc);
addr = (struct sockaddr_atmsvc *) sockaddr;
memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
sizeof(struct sockaddr_atmsvc));
- return 0;
+ return sizeof(struct sockaddr_atmsvc);
}
int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..2b41366fcad2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
}
static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_digipeater[0] = null_ax25_address;
}
}
- *uaddr_len = sizeof (struct full_sockaddr_ax25);
+ err = sizeof (struct full_sockaddr_ax25);
out:
release_sock(sk);
@@ -1989,10 +1989,10 @@ static int __init ax25_init(void)
dev_add_pack(&ax25_packet_type);
register_netdevice_notifier(&ax25_dev_notifier);
- proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+ proc_create("ax25_route", 0444, init_net.proc_net,
&ax25_route_fops);
- proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
- proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
+ proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
+ proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
out:
return rc;
}
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 79e326383726..be09a9883825 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -157,7 +157,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
* Return: 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
- int max_if_num)
+ unsigned int max_if_num)
{
void *data_ptr;
size_t old_size;
@@ -201,7 +201,8 @@ unlock:
*/
static void
batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
+ unsigned int max_if_num,
+ unsigned int del_if_num)
{
size_t chunk_size;
size_t if_offset;
@@ -239,7 +240,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
*/
static void
batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
+ unsigned int max_if_num,
+ unsigned int del_if_num)
{
size_t if_offset;
void *data_ptr;
@@ -276,7 +278,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
* Return: 0 on success, a negative error code otherwise.
*/
static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
+ unsigned int max_if_num,
+ unsigned int del_if_num)
{
spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
@@ -311,7 +314,8 @@ static struct batadv_orig_node *
batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
{
struct batadv_orig_node *orig_node;
- int size, hash_added;
+ int hash_added;
+ size_t size;
orig_node = batadv_orig_hash_find(bat_priv, addr);
if (orig_node)
@@ -893,7 +897,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
u32 i;
size_t word_index;
u8 *w;
- int if_num;
+ unsigned int if_num;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1023,7 +1027,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
struct batadv_neigh_node *tmp_neigh_node = NULL;
struct batadv_neigh_node *router = NULL;
struct batadv_orig_node *orig_node_tmp;
- int if_num;
+ unsigned int if_num;
u8 sum_orig, sum_neigh;
u8 *neigh_addr;
u8 tq_avg;
@@ -1182,7 +1186,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
- int if_num;
+ unsigned int if_num;
unsigned int tq_asym_penalty, inv_asym_penalty;
unsigned int combined_tq;
unsigned int tq_iface_penalty;
@@ -1702,9 +1706,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (is_my_orig) {
unsigned long *word;
- int offset;
+ size_t offset;
s32 bit_pos;
- s16 if_num;
+ unsigned int if_num;
u8 *weight;
orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
@@ -2729,7 +2733,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
struct batadv_gw_node *curr_gw;
- int ret = -EINVAL;
+ int ret = 0;
void *hdr;
router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 27e165ac9302..ec93337ee259 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
@@ -928,7 +928,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
struct batadv_gw_node *curr_gw;
- int ret = -EINVAL;
+ int ret = 0;
void *hdr;
router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index fad47853ad3c..a2de5a44bd41 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
@@ -2161,22 +2161,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_bla_claim *claim;
int idx = 0;
+ int ret = 0;
rcu_read_lock();
hlist_for_each_entry_rcu(claim, head, hash_entry) {
if (idx++ < *idx_skip)
continue;
- if (batadv_bla_claim_dump_entry(msg, portid, seq,
- primary_if, claim)) {
+
+ ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+ primary_if, claim);
+ if (ret) {
*idx_skip = idx - 1;
goto unlock;
}
}
- *idx_skip = idx;
+ *idx_skip = 0;
unlock:
rcu_read_unlock();
- return 0;
+ return ret;
}
/**
@@ -2391,22 +2394,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_bla_backbone_gw *backbone_gw;
int idx = 0;
+ int ret = 0;
rcu_read_lock();
hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
if (idx++ < *idx_skip)
continue;
- if (batadv_bla_backbone_dump_entry(msg, portid, seq,
- primary_if, backbone_gw)) {
+
+ ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+ primary_if, backbone_gw);
+ if (ret) {
*idx_skip = idx - 1;
goto unlock;
}
}
- *idx_skip = idx;
+ *idx_skip = 0;
unlock:
rcu_read_unlock();
- return 0;
+ return ret;
}
/**
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 9703c791ffc5..a60bacf7120b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -43,13 +44,19 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "send.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -393,7 +400,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
batadv_arp_hw_src(skb, hdr_size), &ip_src,
batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
- if (hdr_size == 0)
+ if (hdr_size < sizeof(struct batadv_unicast_packet))
return;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
* the one with the lowest address
*/
if (tmp_max == max && max_orig_node &&
- batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+ batadv_compare_eth(candidate->orig, max_orig_node->orig))
goto out;
ret = true;
@@ -852,6 +859,151 @@ out:
#endif
/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ * netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_dat_entry *dat_entry)
+{
+ int msecs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ if (!hdr)
+ return -ENOBUFS;
+
+ msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+ if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+ dat_entry->ip) ||
+ nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+ dat_entry->mac_addr) ||
+ nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ * a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_dat_entry *dat_entry;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_dat_cache_dump_entry(msg, portid, seq,
+ dat_entry)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->dat.hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_dat_cache_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq, head,
+ &idx))
+ break;
+
+ bucket++;
+ idx = 0;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+/**
* batadv_arp_get_type() - parse an ARP packet and gets the type
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to analyse
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -28,6 +28,7 @@
#include "originator.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
int batadv_dat_init(struct batadv_priv *bat_priv);
void batadv_dat_free(struct batadv_priv *bat_priv);
int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
/**
* batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
{
}
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
u8 subtype)
{
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 22dde42fd80e..0fddc17106bd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
@@ -288,7 +288,8 @@ batadv_frag_merge_packets(struct hlist_head *chain)
/* Move the existing MAC header to just before the payload. (Override
* the fragment header.)
*/
- skb_pull_rcsum(skb_out, hdr_size);
+ skb_pull(skb_out, hdr_size);
+ skb_out->ip_summed = CHECKSUM_NONE;
memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
skb_set_mac_header(skb_out, -ETH_HLEN);
skb_reset_network_header(skb_out);
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 37fe9a644f22..8b198ee798c9 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -746,7 +746,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
{
struct batadv_neigh_node *neigh_curr = NULL;
struct batadv_neigh_node *neigh_old = NULL;
- struct batadv_orig_node *orig_dst_node;
+ struct batadv_orig_node *orig_dst_node = NULL;
struct batadv_gw_node *gw_node = NULL;
struct batadv_gw_node *curr_gw = NULL;
struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
@@ -757,6 +757,9 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
vid = batadv_get_vid(skb, 0);
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ goto out;
+
orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
ethhdr->h_dest, vid);
if (!orig_dst_node)
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 5f186bff284a..c405d15befd6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -763,6 +763,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
+ if (bat_priv->num_ifaces >= UINT_MAX) {
+ ret = -ENOSPC;
+ goto err_dev;
+ }
+
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
soft_iface, NULL, NULL, NULL);
if (ret)
@@ -876,7 +881,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (!bat_priv->num_ifaces) {
+ if (bat_priv->num_ifaces == 0) {
batadv_gw_check_client_stop(bat_priv);
if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -912,7 +917,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (ret)
goto free_if;
- hard_iface->if_num = -1;
+ hard_iface->if_num = 0;
hard_iface->net_dev = net_dev;
hard_iface->soft_iface = NULL;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index e91f29c7c638..55c358ad3331 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -24,6 +24,7 @@
#include <linux/debugfs.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
+#include <linux/eventpoll.h>
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index dc9fa37ddd14..853773e45f79 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -22,6 +22,7 @@
#include <linux/compiler.h>
#include <linux/debugfs.h>
#include <linux/errno.h>
+#include <linux/eventpoll.h>
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
*
* Return: true when x is a predecessor of y, false otherwise
*/
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
- typeof(y)_d2 = (y); \
- typeof(x)_dummy = (_d1 - _d2); \
- (void)(&_d1 == &_d2); \
- _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+ typeof(x)_d1 = (x); \
+ typeof(y)_d2 = (y); \
+ typeof(x)_dummy = (_d1 - _d2); \
+ (void)(&_d1 == &_d2); \
+ _dummy > batadv_smallest_signed_int(_dummy); \
+})
/**
* batadv_seq_after() - Checks if a sequence number x is a successor of y
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cbdeb47ec3f6..a11d3d89f012 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -40,6 +40,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/addrconf.h>
+#include <net/genetlink.h>
#include <net/if_inet6.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
}
/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+ static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+ static const u8 prefix[] = {0x33, 0x33};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
* batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct net_device *bridge = batadv_mcast_get_bridge(dev);
struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
netif_addr_lock_bh(bridge ? bridge : dev);
netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+ if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+ continue;
+
+ if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+ continue;
+
new = kmalloc(sizeof(*new), GFP_ATOMIC);
if (!new) {
ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
/**
* batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct br_ip_list *br_ip_entry, *tmp;
struct batadv_hw_addr *new;
u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
goto out;
list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+ if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+ continue;
+
+ if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+ continue;
+
batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
continue;
@@ -543,8 +598,8 @@ update:
bat_priv->mcast.enabled = true;
}
- return !(mcast_data.flags &
- (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+ return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+ mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
}
/**
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
if (!batadv_mcast_mla_tvlv_update(bat_priv))
goto update;
- ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
- ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
@@ -814,8 +869,8 @@ static struct batadv_orig_node *
batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
struct ethhdr *ethhdr)
{
- return batadv_transtable_search(bat_priv, ethhdr->h_source,
- ethhdr->h_dest, BATADV_NO_FLAGS);
+ return batadv_transtable_search(bat_priv, NULL, ethhdr->h_dest,
+ BATADV_NO_FLAGS);
}
/**
@@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv)
+{
+ u32 flags = bat_priv->mcast.flags;
+ u32 flags_priv = BATADV_NO_FLAGS;
+
+ if (bat_priv->mcast.bridged) {
+ flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+ if (bat_priv->mcast.querier_ipv4.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+ if (bat_priv->mcast.querier_ipv6.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+ if (bat_priv->mcast.querier_ipv4.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+ if (bat_priv->mcast.querier_ipv6.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+ nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_orig_node *orig_node)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig_node->orig)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capabilities)) {
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+ orig_node->mcast_flags)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ * table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, long *idx_skip)
+{
+ struct batadv_orig_node *orig_node;
+ long idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capa_initialized))
+ continue;
+
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+ orig_node)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ long bucket_tmp = *bucket;
+ struct hlist_head *head;
+ long idx_tmp = *idx;
+
+ while (bucket_tmp < hash->size) {
+ head = &hash->table[bucket_tmp];
+
+ if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+ &idx_tmp))
+ break;
+
+ bucket_tmp++;
+ idx_tmp = 0;
+ }
+
+ *bucket = bucket_tmp;
+ *idx = idx_tmp;
+
+ return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ * callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+ struct batadv_hard_iface **primary_if)
+{
+ struct batadv_hard_iface *hard_iface = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ hard_iface = batadv_primary_if_get_selected(bat_priv);
+ if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (!ret && primary_if)
+ *primary_if = hard_iface;
+ else
+ batadv_hardif_put(hard_iface);
+
+ return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_priv *bat_priv;
+ long *bucket = &cb->args[0];
+ long *idx = &cb->args[1];
+ int ret;
+
+ ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+ if (ret)
+ return ret;
+
+ bat_priv = netdev_priv(primary_if->soft_iface);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, bucket, idx);
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+
+/**
* batadv_mcast_free() - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -21,6 +21,7 @@
#include "main.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
return 0;
}
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
{
}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
@@ -45,8 +45,10 @@
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "hard-interface.h"
+#include "multicast.h"
#include "originator.h"
#include "soft-interface.h"
#include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
};
static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
- [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
- [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
- [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
- [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
- [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
- [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
- [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
- [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
- [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
- [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
- [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TQ] = { .type = NLA_U8 },
- [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
- [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
- [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
+ [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
+ [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
+ [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
+ [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+ [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
+ [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
+ [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
+ [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
+ [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TQ] = { .type = NLA_U8 },
+ [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
+ [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
+ [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 },
+ [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 },
};
/**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
goto out;
#endif
+ if (batadv_mcast_mesh_info_put(msg, bat_priv))
+ goto out;
+
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
.policy = batadv_netlink_policy,
.dumpit = batadv_bla_backbone_dump,
},
+ {
+ .cmd = BATADV_CMD_GET_DAT_CACHE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_dat_cache_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_MCAST_FLAGS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_mcast_flags_dump,
+ },
};
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 58a7d9274435..716e5b43acfa 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -1569,7 +1569,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
* Return: 0 on success or negative error number in case of failure
*/
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- int max_if_num)
+ unsigned int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_algo_ops *bao = bat_priv->algo_ops;
@@ -1611,7 +1611,7 @@ err:
* Return: 0 on success or negative error number in case of failure
*/
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- int max_if_num)
+ unsigned int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hashtable *hash = bat_priv->orig_hash;
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 8e543a3cdc6c..3b3f59b881e1 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -73,9 +73,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- int max_if_num);
+ unsigned int max_if_num);
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- int max_if_num);
+ unsigned int max_if_num);
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
unsigned short vid);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b6891e8b741c..cc3ed93a6d51 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -759,6 +759,7 @@ free_skb:
/**
* batadv_reroute_unicast_packet() - update the unicast header for re-routing
* @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
* @unicast_packet: the unicast header to be updated
* @dst_addr: the payload destination
* @vid: VLAN identifier
@@ -770,7 +771,7 @@ free_skb:
* Return: true if the packet header has been updated, false otherwise
*/
static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_unicast_packet *unicast_packet,
u8 *dst_addr, unsigned short vid)
{
@@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
}
/* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, orig_addr);
unicast_packet->ttvn = orig_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ret = true;
out:
@@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* the packet to
*/
if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid))
batadv_dbg_ratelimited(BATADV_DBG_TT,
bat_priv,
@@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* destination can possibly be updated and forwarded towards the new
* target host
*/
- if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
ethhdr->h_dest, vid)) {
batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
if (!primary_if)
return false;
+ /* update the packet header */
+ skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+ unicast_packet->ttvn = curr_ttvn;
+ skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
batadv_hardif_put(primary_if);
- unicast_packet->ttvn = curr_ttvn;
-
return true;
}
@@ -968,14 +973,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
- struct ethhdr *ethhdr;
int ret = NET_RX_DROP;
bool is4addr, is_gw;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
- unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
- ethhdr = eth_hdr(skb);
-
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
if (is4addr)
@@ -995,12 +996,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
goto free_skb;
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
/* If this is a unicast packet from another backgone gw,
* drop it.
*/
- orig_addr_gw = ethhdr->h_source;
+ orig_addr_gw = eth_hdr(skb)->h_source;
orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
if (orig_node_gw) {
is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
@@ -1015,6 +1018,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
}
if (is4addr) {
+ unicast_4addr_packet =
+ (struct batadv_unicast_4addr_packet *)skb->data;
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 900c5ce21cd4..edeffcb9f3a2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -459,13 +459,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
/* skb->dev & skb->pkt_type are set here */
skb->protocol = eth_type_trans(skb, soft_iface);
-
- /* should not be necessary anymore as we use skb_pull_rcsum()
- * TODO: please verify this and remove this TODO
- * -- Dec 21st 2009, Simon Wunderlich
- */
-
- /* skb->ip_summed = CHECKSUM_UNNECESSARY; */
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
batadv_inc_counter(bat_priv, BATADV_CNT_RX);
batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bb1578410e0c..476b052ad982 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -167,7 +167,7 @@ struct batadv_hard_iface {
struct list_head list;
/** @if_num: identificator of the interface */
- s16 if_num;
+ unsigned int if_num;
/** @if_status: status of the interface for batman-adv */
char if_status;
@@ -1596,7 +1596,7 @@ struct batadv_priv {
atomic_t batman_queue_left;
/** @num_ifaces: number of interfaces assigned to this mesh interface */
- char num_ifaces;
+ unsigned int num_ifaces;
/** @mesh_obj: kobject for sysfs mesh subdirectory */
struct kobject *mesh_obj;
@@ -2186,15 +2186,16 @@ struct batadv_algo_orig_ops {
* orig_node due to a new hard-interface being added into the mesh
* (optional)
*/
- int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+ int (*add_if)(struct batadv_orig_node *orig_node,
+ unsigned int max_if_num);
/**
* @del_if: ask the routing algorithm to apply the needed changes to the
* orig_node due to an hard-interface being removed from the mesh
* (optional)
*/
- int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
- int del_if_num);
+ int (*del_if)(struct batadv_orig_node *orig_node,
+ unsigned int max_if_num, unsigned int del_if_num);
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/** @print: print the originator table (optional) */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a9682534c377..45ff5dc124cc 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -749,18 +749,31 @@ static bool conn_use_rpa(struct hci_conn *conn)
}
static void hci_req_add_le_create_conn(struct hci_request *req,
- struct hci_conn *conn)
+ struct hci_conn *conn,
+ bdaddr_t *direct_rpa)
{
struct hci_cp_le_create_conn cp;
struct hci_dev *hdev = conn->hdev;
u8 own_addr_type;
- /* Update random address, but set require_privacy to false so
- * that we never connect with an non-resolvable address.
+ /* If direct address was provided we use it instead of current
+ * address.
*/
- if (hci_update_random_address(req, false, conn_use_rpa(conn),
- &own_addr_type))
- return;
+ if (direct_rpa) {
+ if (bacmp(&req->hdev->random_addr, direct_rpa))
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+ direct_rpa);
+
+ /* direct address is always RPA */
+ own_addr_type = ADDR_LE_DEV_RANDOM;
+ } else {
+ /* Update random address, but set require_privacy to false so
+ * that we never connect with an non-resolvable address.
+ */
+ if (hci_update_random_address(req, false, conn_use_rpa(conn),
+ &own_addr_type))
+ return;
+ }
memset(&cp, 0, sizeof(cp));
@@ -825,7 +838,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, u8 sec_level, u16 conn_timeout,
- u8 role)
+ u8 role, bdaddr_t *direct_rpa)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -940,7 +953,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
}
- hci_req_add_le_create_conn(&req, conn);
+ hci_req_add_le_create_conn(&req, conn, direct_rpa);
create_conn:
err = hci_req_run(&req, create_le_conn_complete);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cd3bbb766c24..139707cd9d35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4648,7 +4648,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
/* This function requires the caller holds hdev->lock */
static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
bdaddr_t *addr,
- u8 addr_type, u8 adv_type)
+ u8 addr_type, u8 adv_type,
+ bdaddr_t *direct_rpa)
{
struct hci_conn *conn;
struct hci_conn_params *params;
@@ -4699,7 +4700,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
}
conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
- HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
+ HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER,
+ direct_rpa);
if (!IS_ERR(conn)) {
/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
* by higher layer that tried to connect, if no then
@@ -4808,8 +4810,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
bdaddr_type = irk->addr_type;
}
- /* Check if we have been requested to connect to this device */
- conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type);
+ /* Check if we have been requested to connect to this device.
+ *
+ * direct_addr is set only for directed advertising reports (it is NULL
+ * for advertising reports) and is already verified to be RPA above.
+ */
+ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
+ direct_addr);
if (conn && type == LE_ADV_IND) {
/* Store report for later inclusion by
* mgmt_device_connected
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Slave connection state and connectable mode bit 38
* and scannable bit 21.
*/
- if (connectable && (!(hdev->le_states[4] & 0x01) ||
- !(hdev->le_states[2] & 0x40)))
+ if (connectable && (!(hdev->le_states[4] & 0x40) ||
+ !(hdev->le_states[2] & 0x20)))
return false;
}
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Master connection state and connectable mode bit 35 and
* scannable 19.
*/
- if (connectable && (!(hdev->le_states[4] & 0x10) ||
+ if (connectable && (!(hdev->le_states[4] & 0x08) ||
!(hdev->le_states[2] & 0x08)))
return false;
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
}
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
goto done;
}
- *addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
haddr->hci_channel= hci_pi(sk)->channel;
+ err = sizeof(*haddr);
done:
release_sock(sk);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fc6615d59165..9b7907ebfa01 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7156,7 +7156,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
hcon = hci_connect_le(hdev, dst, dst_type,
chan->sec_level,
HCI_LE_CONN_TIMEOUT,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, NULL);
else
hcon = hci_connect_le_scan(hdev, dst, dst_type,
chan->sec_level,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
}
static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_l2);
la->l2_psm = chan->psm;
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
la->l2_bdaddr_type = chan->src_type;
}
- return 0;
+ return sizeof(struct sockaddr_l2);
}
static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6e9fc86d8daf..8a80d48d89c4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4801,6 +4801,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
case MGMT_LTK_P256_DEBUG:
authenticated = 0x00;
type = SMP_LTK_P256_DEBUG;
+ /* fall through */
default:
continue;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -221,6 +221,7 @@ static void __rfcomm_sock_close(struct sock *sk)
case BT_CONFIG:
case BT_CONNECTED:
rfcomm_dlc_close(d, 0);
+ /* fall through */
default:
sock_set_flag(sk, SOCK_ZAPPED);
@@ -533,7 +534,7 @@ done:
return err;
}
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
@@ -552,8 +553,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
else
bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
- *len = sizeof(struct sockaddr_rc);
- return 0;
+ return sizeof(struct sockaddr_rc);
}
static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5f3074cb6b4d..5e44d842cc5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
return sprintf(buf, "%d\n", dev->channel);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(channel, 0444, show_channel, NULL);
static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
struct rfcomm_dlc *dlc)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
}
static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_sco);
if (peer)
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
- return 0;
+ return sizeof(struct sockaddr_sco);
}
static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 01117ae84f1d..a2ddae2f37d7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
else
sec_level = authreq_to_seclevel(auth);
- if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK))
+ if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) {
+ /* If link is already encrypted with sufficient security we
+ * still need refresh encryption as per Core Spec 5.0 Vol 3,
+ * Part H 2.4.6
+ */
+ smp_ltk_encrypt(conn, hcon->sec_level);
return 0;
+ }
if (sec_level > hcon->pending_sec_level)
hcon->pending_sec_level = sec_level;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..671d13c10f6f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
switch (event) {
case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
break;
case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..e682a668ce57 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,11 +224,11 @@ static void br_get_stats64(struct net_device *dev,
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- if (new_mtu > br_min_mtu(br))
- return -EINVAL;
dev->mtu = new_mtu;
+ /* this flag will be cleared if the MTU was automatically adjusted */
+ br->mtu_set_by_user = true;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..82c1a6f430b3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -425,22 +425,31 @@ int br_del_bridge(struct net *net, const char *name)
}
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int br_mtu_min(const struct net_bridge *br)
{
const struct net_bridge_port *p;
- int mtu = 0;
+ int ret_mtu = 0;
+ list_for_each_entry(p, &br->port_list, list)
+ if (!ret_mtu || ret_mtu > p->dev->mtu)
+ ret_mtu = p->dev->mtu;
+
+ return ret_mtu ? ret_mtu : ETH_DATA_LEN;
+}
+
+void br_mtu_auto_adjust(struct net_bridge *br)
+{
ASSERT_RTNL();
- if (list_empty(&br->port_list))
- mtu = ETH_DATA_LEN;
- else {
- list_for_each_entry(p, &br->port_list, list) {
- if (!mtu || p->dev->mtu < mtu)
- mtu = p->dev->mtu;
- }
- }
- return mtu;
+ /* if the bridge MTU was manually configured don't mess with it */
+ if (br->mtu_set_by_user)
+ return;
+
+ /* change to the minimum MTU and clear the flag which was set by
+ * the bridge ndo_change_mtu callback
+ */
+ dev_set_mtu(br->dev, br_mtu_min(br));
+ br->mtu_set_by_user = false;
}
static void br_set_gso_limits(struct net_bridge *br)
@@ -594,7 +603,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +650,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
*/
del_nbp(p);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 27f1d4f2114a..9b16eaf33819 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
- goto inhdr_error;
+ goto csum_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
@@ -236,6 +236,8 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
*/
return 0;
+csum_error:
+ __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..a7cb3ece5031 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -410,6 +410,7 @@ struct net_bridge {
int offload_fwd_mark;
#endif
bool neigh_suppress_enabled;
+ bool mtu_set_by_user;
struct hlist_head fdb_list;
};
@@ -578,7 +579,7 @@ int br_del_bridge(struct net *net, const char *name);
int br_add_if(struct net_bridge *br, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+void br_mtu_auto_adjust(struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
netdev_features_t features);
void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index b1be0dcfba6b..0318a69888d4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
static struct bin_attribute bridge_forward = {
.attr = { .name = SYSFS_BRIDGE_FDB,
- .mode = S_IRUGO, },
+ .mode = 0444, },
.read = brforward_read,
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 126a8ea73c96..fd31ad83ec7b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \
{ \
return store_flag(p, v, _mask); \
} \
-static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \
+static BRPORT_ATTR(_name, 0644, \
show_##_name, store_##_name)
static int store_flag(struct net_bridge_port *p, unsigned long v,
@@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->path_cost);
}
-static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(path_cost, 0644,
show_path_cost, br_stp_set_path_cost);
static ssize_t show_priority(struct net_bridge_port *p, char *buf)
@@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->priority);
}
-static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(priority, 0644,
show_priority, br_stp_set_port_priority);
static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_root);
}
-static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL);
static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_bridge);
}
-static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL);
static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_port);
}
-static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL);
static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_cost);
}
-static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL);
static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_id);
}
-static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+static BRPORT_ATTR(port_id, 0444, show_port_id, NULL);
static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_no);
}
-static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+static BRPORT_ATTR(port_no, 0444, show_port_no, NULL);
static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->topology_change_ack);
}
-static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL);
static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->config_pending);
}
-static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL);
static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->state);
}
-static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+static BRPORT_ATTR(state, 0444, show_port_state, NULL);
static ssize_t show_message_age_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
}
-static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
}
-static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL);
static ssize_t show_hold_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
}
-static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
static int store_flush(struct net_bridge_port *p, unsigned long v)
{
br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
return 0;
}
-static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static BRPORT_ATTR(flush, 0200, NULL, store_flush);
static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
{
@@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
return 0;
}
-static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
store_group_fwd_mask);
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
@@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p,
{
return br_multicast_set_port_router(p, v);
}
-static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
store_multicast_router);
BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 51935270c651..9896f4975353 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -168,6 +168,8 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
masterv = br_vlan_find(vg, vid);
if (WARN_ON(!masterv))
return NULL;
+ refcount_set(&masterv->refcnt, 1);
+ return masterv;
}
refcount_inc(&masterv->refcnt);
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 225d1668dfdd..f212447794bd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -5,7 +5,7 @@
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- tristate "Ethernet Bridge nf_tables support"
+ bool "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 2f28e16de6c7..4bc758dd4a8c 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,7 +3,6 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
-obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index ce7152a12bd8..620e54f08296 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -172,18 +172,69 @@ ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
+static bool poolsize_invalid(const struct ebt_mac_wormhash *w)
+{
+ return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple));
+}
+
+static bool wormhash_offset_invalid(int off, unsigned int len)
+{
+ if (off == 0) /* not present */
+ return false;
+
+ if (off < (int)sizeof(struct ebt_among_info) ||
+ off % __alignof__(struct ebt_mac_wormhash))
+ return true;
+
+ off += sizeof(struct ebt_mac_wormhash);
+
+ return off > len;
+}
+
+static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b)
+{
+ if (a == 0)
+ a = sizeof(struct ebt_among_info);
+
+ return ebt_mac_wormhash_size(wh) + a == b;
+}
+
static int ebt_among_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_among_info *info = par->matchinfo;
const struct ebt_entry_match *em =
container_of(par->matchinfo, const struct ebt_entry_match, data);
- int expected_length = sizeof(struct ebt_among_info);
+ unsigned int expected_length = sizeof(struct ebt_among_info);
const struct ebt_mac_wormhash *wh_dst, *wh_src;
int err;
+ if (expected_length > em->match_size)
+ return -EINVAL;
+
+ if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) ||
+ wormhash_offset_invalid(info->wh_src_ofs, em->match_size))
+ return -EINVAL;
+
wh_dst = ebt_among_wh_dst(info);
- wh_src = ebt_among_wh_src(info);
+ if (poolsize_invalid(wh_dst))
+ return -EINVAL;
+
expected_length += ebt_mac_wormhash_size(wh_dst);
+ if (expected_length > em->match_size)
+ return -EINVAL;
+
+ wh_src = ebt_among_wh_src(info);
+ if (poolsize_invalid(wh_src))
+ return -EINVAL;
+
+ if (info->wh_src_ofs < info->wh_dst_ofs) {
+ if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs))
+ return -EINVAL;
+ } else {
+ if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs))
+ return -EINVAL;
+ }
+
expected_length += ebt_mac_wormhash_size(wh_src);
if (em->match_size != EBT_ALIGN(expected_length)) {
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 2b46c50abce0..ffaa8ce2e724 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -19,9 +19,18 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ip.h>
-struct tcpudphdr {
- __be16 src;
- __be16 dst;
+union pkthdr {
+ struct {
+ __be16 src;
+ __be16 dst;
+ } tcpudphdr;
+ struct {
+ u8 type;
+ u8 code;
+ } icmphdr;
+ struct {
+ u8 type;
+ } igmphdr;
};
static bool
@@ -30,8 +39,8 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_ip_info *info = par->matchinfo;
const struct iphdr *ih;
struct iphdr _iph;
- const struct tcpudphdr *pptr;
- struct tcpudphdr _ports;
+ const union pkthdr *pptr;
+ union pkthdr _pkthdr;
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL)
@@ -50,29 +59,43 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (info->bitmask & EBT_IP_PROTO) {
if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
return false;
- if (!(info->bitmask & EBT_IP_DPORT) &&
- !(info->bitmask & EBT_IP_SPORT))
+ if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT |
+ EBT_IP_ICMP | EBT_IP_IGMP)))
return true;
if (ntohs(ih->frag_off) & IP_OFFSET)
return false;
+
+ /* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */
pptr = skb_header_pointer(skb, ih->ihl*4,
- sizeof(_ports), &_ports);
+ sizeof(_pkthdr), &_pkthdr);
if (pptr == NULL)
return false;
if (info->bitmask & EBT_IP_DPORT) {
- u32 dst = ntohs(pptr->dst);
+ u32 dst = ntohs(pptr->tcpudphdr.dst);
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP_SPORT) {
- u32 src = ntohs(pptr->src);
+ u32 src = ntohs(pptr->tcpudphdr.src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
return false;
}
+ if ((info->bitmask & EBT_IP_ICMP) &&
+ NF_INVF(info, EBT_IP_ICMP,
+ pptr->icmphdr.type < info->icmp_type[0] ||
+ pptr->icmphdr.type > info->icmp_type[1] ||
+ pptr->icmphdr.code < info->icmp_code[0] ||
+ pptr->icmphdr.code > info->icmp_code[1]))
+ return false;
+ if ((info->bitmask & EBT_IP_IGMP) &&
+ NF_INVF(info, EBT_IP_IGMP,
+ pptr->igmphdr.type < info->igmp_type[0] ||
+ pptr->igmphdr.type > info->igmp_type[1]))
+ return false;
}
return true;
}
@@ -101,6 +124,21 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
return -EINVAL;
+ if (info->bitmask & EBT_IP_ICMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_ICMP)
+ return -EINVAL;
+ if (info->icmp_type[0] > info->icmp_type[1] ||
+ info->icmp_code[0] > info->icmp_code[1])
+ return -EINVAL;
+ }
+ if (info->bitmask & EBT_IP_IGMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_IGMP)
+ return -EINVAL;
+ if (info->igmp_type[0] > info->igmp_type[1])
+ return -EINVAL;
+ }
return 0;
}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3140eb912d7e..47ba98db145d 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,8 +153,6 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_stp_info *info = par->matchinfo;
- const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
- const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const struct ebt_entry *e = par->entryinfo;
if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
@@ -162,8 +160,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
/* Make sure the match only receives stp frames */
if (!par->nft_compat &&
- (!ether_addr_equal(e->destmac, bridge_ula) ||
- !ether_addr_equal(e->destmsk, msk) ||
+ (!ether_addr_equal(e->destmac, eth_stp_addr) ||
+ !is_broadcast_ether_addr(e->destmsk) ||
!(e->bitmask & EBT_DESTMAC)))
return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 02c4b409d317..032e0fe45940 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -223,9 +223,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
return NF_DROP;
}
- /* increase counter */
- (*(counter_base + i)).pcnt++;
- (*(counter_base + i)).bcnt += skb->len;
+ ADD_COUNTER(*(counter_base + i), 1, skb->len);
/* these should only watch: not modify, nor tell us
* what to do with the packet
@@ -358,12 +356,12 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
left - sizeof(struct ebt_entry_match) < m->match_size)
return -EINVAL;
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
if (!IS_ERR(match))
module_put(match->me);
request_module("ebt_%s", m->u.name);
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
}
if (IS_ERR(match))
return PTR_ERR(match);
@@ -968,10 +966,9 @@ static void get_counters(const struct ebt_counter *oldcounters,
if (cpu == 0)
continue;
counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
- for (i = 0; i < nentries; i++) {
- counters[i].pcnt += counter_base[i].pcnt;
- counters[i].bcnt += counter_base[i].bcnt;
- }
+ for (i = 0; i < nentries; i++)
+ ADD_COUNTER(counters[i], counter_base[i].pcnt,
+ counter_base[i].bcnt);
}
}
@@ -1324,10 +1321,8 @@ static int do_update_counters(struct net *net, const char *name,
write_lock_bh(&t->lock);
/* we add to the counters of the first cpu */
- for (i = 0; i < num_counters; i++) {
- t->private->counters[i].pcnt += tmp[i].pcnt;
- t->private->counters[i].bcnt += tmp[i].bcnt;
- }
+ for (i = 0; i < num_counters; i++)
+ ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt);
write_unlock_bh(&t->lock);
ret = 0;
@@ -1355,16 +1350,17 @@ static int update_counters(struct net *net, const void __user *user,
static inline int ebt_obj_to_user(char __user *um, const char *_name,
const char *data, int entrysize,
- int usersize, int datasize)
+ int usersize, int datasize, u8 revision)
{
- char name[EBT_FUNCTION_MAXNAMELEN] = {0};
+ char name[EBT_EXTENSION_MAXNAMELEN] = {0};
- /* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+ /* ebtables expects 31 bytes long names but xt_match names are 29 bytes
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
strlcpy(name, _name, sizeof(name));
- if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
- put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
+ if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
+ put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
+ put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
xt_data_to_user(um + entrysize, data, usersize, datasize,
XT_ALIGN(datasize)))
return -EFAULT;
@@ -1377,7 +1373,8 @@ static inline int ebt_match_to_user(const struct ebt_entry_match *m,
{
return ebt_obj_to_user(ubase + ((char *)m - base),
m->u.match->name, m->data, sizeof(*m),
- m->u.match->usersize, m->match_size);
+ m->u.match->usersize, m->match_size,
+ m->u.match->revision);
}
static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
@@ -1385,7 +1382,8 @@ static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
{
return ebt_obj_to_user(ubase + ((char *)w - base),
w->u.watcher->name, w->data, sizeof(*w),
- w->u.watcher->usersize, w->watcher_size);
+ w->u.watcher->usersize, w->watcher_size,
+ w->u.watcher->revision);
}
static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
@@ -1416,7 +1414,8 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
if (ret != 0)
return ret;
ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t),
- t->u.target->usersize, t->target_size);
+ t->u.target->usersize, t->target_size,
+ t->u.target->revision);
if (ret != 0)
return ret;
@@ -1604,7 +1603,10 @@ struct compat_ebt_replace {
/* struct ebt_entry_match, _target and _watcher have same layout */
struct compat_ebt_entry_mwt {
union {
- char name[EBT_FUNCTION_MAXNAMELEN];
+ struct {
+ char name[EBT_EXTENSION_MAXNAMELEN];
+ u8 revision;
+ };
compat_uptr_t ptr;
} u;
compat_uint_t match_size;
@@ -1641,10 +1643,12 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
int off = ebt_compat_match_offset(match, m->match_size);
compat_uint_t msize = m->match_size - off;
- BUG_ON(off >= m->match_size);
+ if (WARN_ON(off >= m->match_size))
+ return -EINVAL;
- if (copy_to_user(cm->u.name, match->name,
- strlen(match->name) + 1) || put_user(msize, &cm->match_size))
+ if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) ||
+ put_user(match->revision, &cm->u.revision) ||
+ put_user(msize, &cm->match_size))
return -EFAULT;
if (match->compat_to_user) {
@@ -1671,10 +1675,12 @@ static int compat_target_to_user(struct ebt_entry_target *t,
int off = xt_compat_target_offset(target);
compat_uint_t tsize = t->target_size - off;
- BUG_ON(off >= t->target_size);
+ if (WARN_ON(off >= t->target_size))
+ return -EINVAL;
- if (copy_to_user(cm->u.name, target->name,
- strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
+ if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) ||
+ put_user(target->revision, &cm->u.revision) ||
+ put_user(tsize, &cm->match_size))
return -EFAULT;
if (target->compat_to_user) {
@@ -1819,10 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info,
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
+ int ret;
newinfo->entries_size = size;
- xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ if (ret)
+ return ret;
+
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
}
@@ -1902,7 +1912,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state,
if (state->buf_kern_start == NULL)
goto count_only;
- BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len);
+ if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len))
+ return -EINVAL;
memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz);
@@ -1915,7 +1926,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz)
{
char *b = state->buf_kern_start;
- BUG_ON(b && state->buf_kern_offset > state->buf_kern_len);
+ if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len))
+ return -EINVAL;
if (b != NULL && sz > 0)
memset(b + state->buf_kern_offset, 0, sz);
@@ -1934,7 +1946,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
struct ebt_entries_buf_state *state,
const unsigned char *base)
{
- char name[EBT_FUNCTION_MAXNAMELEN];
+ char name[EBT_EXTENSION_MAXNAMELEN];
struct xt_match *match;
struct xt_target *wt;
void *dst = NULL;
@@ -1948,7 +1960,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
switch (compat_mwt) {
case EBT_COMPAT_MATCH:
- match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
+ match = xt_request_find_match(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(match))
return PTR_ERR(match);
@@ -1967,7 +1980,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
break;
case EBT_COMPAT_WATCHER: /* fallthrough */
case EBT_COMPAT_TARGET:
- wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
+ wt = xt_request_find_target(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(wt))
return PTR_ERR(wt);
off = xt_compat_target_offset(wt);
@@ -1992,8 +2006,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
pad = XT_ALIGN(size_kern) - size_kern;
if (pad > 0 && dst) {
- BUG_ON(state->buf_kern_len <= pad);
- BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad);
+ if (WARN_ON(state->buf_kern_len <= pad))
+ return -EINVAL;
+ if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad))
+ return -EINVAL;
memset(dst + size_kern, 0, pad);
}
return off + match_size;
@@ -2043,7 +2059,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (ret < 0)
return ret;
- BUG_ON(ret < match32->match_size);
+ if (WARN_ON(ret < match32->match_size))
+ return -EINVAL;
growth += ret - match32->match_size;
growth += ebt_compat_entry_padsize();
@@ -2053,7 +2070,9 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (match_kern)
match_kern->match_size = ret;
- WARN_ON(type == EBT_COMPAT_TARGET && size_left);
+ if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+ return -EINVAL;
+
match32 = (struct compat_ebt_entry_mwt *) buf;
}
@@ -2109,6 +2128,19 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
*
* offsets are relative to beginning of struct ebt_entry (i.e., 0).
*/
+ for (i = 0; i < 4 ; ++i) {
+ if (offsets[i] > *total)
+ return -EINVAL;
+
+ if (i < 3 && offsets[i] == *total)
+ return -EINVAL;
+
+ if (i == 0)
+ continue;
+ if (offsets[i-1] > offsets[i])
+ return -EINVAL;
+ }
+
for (i = 0, j = 1 ; j < 4 ; j++, i++) {
struct compat_ebt_entry_mwt *match32;
unsigned int size;
@@ -2140,7 +2172,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
startoff = state->buf_user_offset - startoff;
- BUG_ON(*total < startoff);
+ if (WARN_ON(*total < startoff))
+ return -EINVAL;
*total -= startoff;
return 0;
}
@@ -2245,7 +2278,9 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ if (ret < 0)
+ goto out_unlock;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
if (ret < 0)
goto out_unlock;
@@ -2267,7 +2302,8 @@ static int compat_do_replace(struct net *net, void __user *user,
state.buf_kern_len = size64;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
- BUG_ON(ret < 0); /* parses same data again */
+ if (WARN_ON(ret < 0))
+ goto out_unlock;
vfree(entries_tmp);
tmp.entries_size = size64;
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
deleted file mode 100644
index 5160cf614176..000000000000
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netfilter_bridge.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_bridge(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
- break;
- case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
- break;
- default:
- nft_set_pktinfo_unspec(&pkt, skb);
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_bridge = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_BRIDGE,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_BR_PRE_ROUTING) |
- (1 << NF_BR_LOCAL_IN) |
- (1 << NF_BR_FORWARD) |
- (1 << NF_BR_LOCAL_OUT) |
- (1 << NF_BR_POST_ROUTING),
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
-};
-
-static int __init nf_tables_bridge_init(void)
-{
- return nft_register_chain_type(&filter_bridge);
-}
-
-static void __exit nf_tables_bridge_exit(void)
-{
- nft_unregister_chain_type(&filter_bridge);
-}
-
-module_init(nf_tables_bridge_init);
-module_exit(nf_tables_bridge_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..1684ba5b51eb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
MODULE_ALIAS_NETPROTO(PF_CAN);
static int stats_timer __read_mostly = 1;
-module_param(stats_timer, int, S_IRUGO);
+module_param(stats_timer, int, 0444);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
static struct kmem_cache *rcv_cache __read_mostly;
diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..faa3da88a127 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME);
#define CGW_DEFAULT_HOPS 1
static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
-module_param(max_hops, uint, S_IRUGO);
+module_param(max_hops, uint, 0444);
MODULE_PARM_DESC(max_hops,
"maximum " CAN_GW_NAME " routing hops for CAN frames "
"(valid values: " __stringify(CGW_MIN_HOPS) "-"
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
addr->can_family = AF_CAN;
addr->can_ifindex = ro->ifindex;
- *len = sizeof(*addr);
-
- return 0;
+ return sizeof(*addr);
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index b4bded4b5396..12bf49772d24 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+ striper.o \
debugfs.o \
auth.o auth_none.o \
crypto.o armor.o \
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4d4c82229e9e..584fdbef2088 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = {
.get = param_get_supported_features,
};
module_param_cb(supported_features, &param_ops_supported_features, NULL,
- S_IRUGO);
+ 0444);
const char *ceph_msg_type_name(int type)
{
@@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
+ case CEPH_MSG_FS_MAP_USER: return "fs_map_user";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
case CEPH_MSG_CLIENT_REQUEST: return "client_request";
@@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_CLIENT_REPLY: return "client_reply";
case CEPH_MSG_CLIENT_CAPS: return "client_caps";
case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+ case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
case CEPH_MSG_CLIENT_SNAP: return "client_snap";
case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+ case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";
+ case CEPH_MSG_POOLOP: return "poolop";
+ case CEPH_MSG_MON_COMMAND: return "mon_command";
+ case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack";
case CEPH_MSG_OSD_MAP: return "osd_map";
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
@@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU", err, fsid);
+ dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
return err;
}
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index bf9d079cbafd..02172c408ff2 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -347,10 +347,12 @@ struct key_type key_type_ceph = {
.destroy = ceph_key_destroy,
};
-int ceph_crypto_init(void) {
+int __init ceph_crypto_init(void)
+{
return register_key_type(&key_type_ceph);
}
-void ceph_crypto_shutdown(void) {
+void ceph_crypto_shutdown(void)
+{
unregister_key_type(&key_type_ceph);
}
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 1eef6806aa1a..02952605d121 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
CEPH_DEFINE_SHOW_FUNC(client_options_show)
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
{
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
if (!ceph_debugfs_dir)
@@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->monc.debugfs_file = debugfs_create_file("monc",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&monc_show_fops);
@@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->osdc.debugfs_file = debugfs_create_file("osdc",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&osdc_show_fops);
@@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_monmap = debugfs_create_file("monmap",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&monmap_show_fops);
@@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_osdmap = debugfs_create_file("osdmap",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&osdmap_show_fops);
@@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_options = debugfs_create_file("client_options",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&client_options_show_fops);
@@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
#else /* CONFIG_DEBUG_FS */
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
{
return 0;
}
@@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
}
#endif /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8a4d3758030b..fcb40c12b1f8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void)
ceph_msgr_slab_exit();
}
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
{
if (ceph_msgr_slab_init())
return -ENOMEM;
@@ -299,7 +299,6 @@ int ceph_msgr_init(void)
return -ENOMEM;
}
-EXPORT_SYMBOL(ceph_msgr_init);
void ceph_msgr_exit(void)
{
@@ -307,7 +306,6 @@ void ceph_msgr_exit(void)
_ceph_msgr_exit();
}
-EXPORT_SYMBOL(ceph_msgr_exit);
void ceph_msgr_flush(void)
{
@@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
size_t length)
{
struct ceph_msg_data *data = cursor->data;
- struct bio *bio;
+ struct ceph_bio_iter *it = &cursor->bio_iter;
- BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+ cursor->resid = min_t(size_t, length, data->bio_length);
+ *it = data->bio_pos;
+ if (cursor->resid < it->iter.bi_size)
+ it->iter.bi_size = cursor->resid;
- bio = data->bio;
- BUG_ON(!bio);
-
- cursor->resid = min(length, data->bio_length);
- cursor->bio = bio;
- cursor->bvec_iter = bio->bi_iter;
- cursor->last_piece =
- cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+ BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+ cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
}
static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
size_t *page_offset,
size_t *length)
{
- struct ceph_msg_data *data = cursor->data;
- struct bio *bio;
- struct bio_vec bio_vec;
-
- BUG_ON(data->type != CEPH_MSG_DATA_BIO);
-
- bio = cursor->bio;
- BUG_ON(!bio);
-
- bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
-
- *page_offset = (size_t) bio_vec.bv_offset;
- BUG_ON(*page_offset >= PAGE_SIZE);
- if (cursor->last_piece) /* pagelist offset is always 0 */
- *length = cursor->resid;
- else
- *length = (size_t) bio_vec.bv_len;
- BUG_ON(*length > cursor->resid);
- BUG_ON(*page_offset + *length > PAGE_SIZE);
+ struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
+ cursor->bio_iter.iter);
- return bio_vec.bv_page;
+ *page_offset = bv.bv_offset;
+ *length = bv.bv_len;
+ return bv.bv_page;
}
static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
- struct bio *bio;
- struct bio_vec bio_vec;
+ struct ceph_bio_iter *it = &cursor->bio_iter;
- BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+ BUG_ON(bytes > cursor->resid);
+ BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
+ cursor->resid -= bytes;
+ bio_advance_iter(it->bio, &it->iter, bytes);
- bio = cursor->bio;
- BUG_ON(!bio);
+ if (!cursor->resid) {
+ BUG_ON(!cursor->last_piece);
+ return false; /* no more data */
+ }
- bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+ if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+ return false; /* more bytes to process in this segment */
- /* Advance the cursor offset */
+ if (!it->iter.bi_size) {
+ it->bio = it->bio->bi_next;
+ it->iter = it->bio->bi_iter;
+ if (cursor->resid < it->iter.bi_size)
+ it->iter.bi_size = cursor->resid;
+ }
- BUG_ON(cursor->resid < bytes);
- cursor->resid -= bytes;
+ BUG_ON(cursor->last_piece);
+ BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+ cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
+ return true;
+}
+#endif /* CONFIG_BLOCK */
- bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct bio_vec *bvecs = data->bvec_pos.bvecs;
- if (bytes < bio_vec.bv_len)
- return false; /* more bytes to process in this segment */
+ cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
+ cursor->bvec_iter = data->bvec_pos.iter;
+ cursor->bvec_iter.bi_size = cursor->resid;
- /* Move on to the next segment, and possibly the next bio */
+ BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->last_piece =
+ cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
+}
- if (!cursor->bvec_iter.bi_size) {
- bio = bio->bi_next;
- cursor->bio = bio;
- if (bio)
- cursor->bvec_iter = bio->bi_iter;
- else
- memset(&cursor->bvec_iter, 0,
- sizeof(cursor->bvec_iter));
- }
+static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset,
+ size_t *length)
+{
+ struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
+ cursor->bvec_iter);
+
+ *page_offset = bv.bv_offset;
+ *length = bv.bv_len;
+ return bv.bv_page;
+}
+
+static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+
+ BUG_ON(bytes > cursor->resid);
+ BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->resid -= bytes;
+ bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
- if (!cursor->last_piece) {
- BUG_ON(!cursor->resid);
- BUG_ON(!bio);
- /* A short read is OK, so use <= rather than == */
- if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
- cursor->last_piece = true;
+ if (!cursor->resid) {
+ BUG_ON(!cursor->last_piece);
+ return false; /* no more data */
}
+ if (!bytes || cursor->bvec_iter.bi_bvec_done)
+ return false; /* more bytes to process in this segment */
+
+ BUG_ON(cursor->last_piece);
+ BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->last_piece =
+ cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
return true;
}
-#endif /* CONFIG_BLOCK */
/*
* For a page array, a piece comes from the first page in the array
@@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
ceph_msg_data_bio_cursor_init(cursor, length);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ ceph_msg_data_bvecs_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
page = ceph_msg_data_bio_next(cursor, page_offset, length);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
break;
}
+
BUG_ON(!page);
BUG_ON(*page_offset + *length > PAGE_SIZE);
BUG_ON(!*length);
+ BUG_ON(*length > cursor->resid);
if (last_piece)
*last_piece = cursor->last_piece;
@@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
new_piece = ceph_msg_data_bio_advance(cursor, bytes);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con)
* been revoked, so use the zero page.
*/
crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
- while (cursor->resid) {
+ while (cursor->total_resid) {
struct page *page;
size_t page_offset;
size_t length;
bool last_piece;
int ret;
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
page = ceph_msg_data_next(cursor, &page_offset, &length,
&last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
@@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- while (cursor->resid) {
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
@@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
#ifdef CONFIG_BLOCK
-void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
- size_t length)
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+ u32 length)
{
struct ceph_msg_data *data;
- BUG_ON(!bio);
-
data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
BUG_ON(!data);
- data->bio = bio;
+ data->bio_pos = *bio_pos;
data->bio_length = length;
list_add_tail(&data->links, &msg->data);
@@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
EXPORT_SYMBOL(ceph_msg_data_add_bio);
#endif /* CONFIG_BLOCK */
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
+ BUG_ON(!data);
+ data->bvec_pos = *bvec_pos;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += bvec_pos->iter.bi_size;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1547107f4854..b3dac24412d3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
num_mon = ceph_decode_32(&p);
ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
- if (num_mon >= CEPH_MAX_MON)
+ if (num_mon > CEPH_MAX_MON)
goto bad;
m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
if (m == NULL)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2814dba5902d..ea2a6c9fb7ce 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -20,6 +20,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
+#include <linux/ceph/striper.h>
#define OSD_OPREPLY_FRONT_LEN 512
@@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
u64 *objnum, u64 *objoff, u64 *objlen)
{
u64 orig_len = *plen;
- int r;
+ u32 xlen;
/* object extent? */
- r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
- objoff, objlen);
- if (r < 0)
- return r;
+ ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+ objoff, &xlen);
+ *objlen = xlen;
if (*objlen < orig_len) {
*plen = *objlen;
dout(" skipping last %llu, final file extent %llu~%llu\n",
@@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
}
dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
-
return 0;
}
@@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
#ifdef CONFIG_BLOCK
static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
- struct bio *bio, size_t bio_length)
+ struct ceph_bio_iter *bio_pos,
+ u32 bio_length)
{
osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
- osd_data->bio = bio;
+ osd_data->bio_pos = *bio_pos;
osd_data->bio_length = bio_length;
}
#endif /* CONFIG_BLOCK */
+static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
+ osd_data->bvec_pos = *bvec_pos;
+}
+
#define osd_req_op_data(oreq, whch, typ, fld) \
({ \
struct ceph_osd_request *__oreq = (oreq); \
@@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
#ifdef CONFIG_BLOCK
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
- unsigned int which, struct bio *bio, size_t bio_length)
+ unsigned int which,
+ struct ceph_bio_iter *bio_pos,
+ u32 bio_length)
{
struct ceph_osd_data *osd_data;
osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
- ceph_osd_data_bio_init(osd_data, bio, bio_length);
+ ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
#endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
@@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct bio_vec *bvecs, u32 bytes)
+{
+ struct ceph_osd_data *osd_data;
+ struct ceph_bvec_iter it = {
+ .bvecs = bvecs,
+ .iter = { .bi_size = bytes },
+ };
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_bvecs_init(osd_data, &it);
+ osd_req->r_ops[which].cls.indata_len += bytes;
+ osd_req->r_ops[which].indata_len += bytes;
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs);
+
void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
unsigned int which, struct page **pages, u64 length,
u32 alignment, bool pages_from_pool, bool own_pages)
@@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
case CEPH_OSD_DATA_TYPE_BIO:
return (u64)osd_data->bio_length;
#endif /* CONFIG_BLOCK */
+ case CEPH_OSD_DATA_TYPE_BVECS:
+ return osd_data->bvec_pos.iter.bi_size;
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
@@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
#ifdef CONFIG_BLOCK
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
- ceph_msg_data_add_bio(msg, osd_data->bio, length);
+ ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
#endif
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
+ ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
@@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
}
EXPORT_SYMBOL(ceph_osdc_writepages);
-int ceph_osdc_setup(void)
+int __init ceph_osdc_setup(void)
{
size_t size = sizeof(struct ceph_osd_request) +
CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
@@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void)
return ceph_osd_request_cache ? 0 : -ENOMEM;
}
-EXPORT_SYMBOL(ceph_osdc_setup);
void ceph_osdc_cleanup(void)
{
@@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void)
kmem_cache_destroy(ceph_osd_request_cache);
ceph_osd_request_cache = NULL;
}
-EXPORT_SYMBOL(ceph_osdc_cleanup);
/*
* handle incoming message
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0da27c66349a..9645ffd6acfb 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -4,7 +4,6 @@
#include <linux/module.h>
#include <linux/slab.h>
-#include <asm/div64.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
@@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
}
/*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
- u64 off, u64 len,
- u64 *ono,
- u64 *oxoff, u64 *oxlen)
-{
- u32 osize = layout->object_size;
- u32 su = layout->stripe_unit;
- u32 sc = layout->stripe_count;
- u32 bl, stripeno, stripepos, objsetno;
- u32 su_per_object;
- u64 t, su_offset;
-
- dout("mapping %llu~%llu osize %u fl_su %u\n", off, len,
- osize, su);
- if (su == 0 || sc == 0)
- goto invalid;
- su_per_object = osize / su;
- if (su_per_object == 0)
- goto invalid;
- dout("osize %u / su %u = su_per_object %u\n", osize, su,
- su_per_object);
-
- if ((su & ~PAGE_MASK) != 0)
- goto invalid;
-
- /* bl = *off / su; */
- t = off;
- do_div(t, su);
- bl = t;
- dout("off %llu / su %u = bl %u\n", off, su, bl);
-
- stripeno = bl / sc;
- stripepos = bl % sc;
- objsetno = stripeno / su_per_object;
-
- *ono = objsetno * sc + stripepos;
- dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
-
- /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */
- t = off;
- su_offset = do_div(t, su);
- *oxoff = su_offset + (stripeno % su_per_object) * su;
-
- /*
- * Calculate the length of the extent being written to the selected
- * object. This is the minimum of the full length requested (len) or
- * the remainder of the current stripe being written to.
- */
- *oxlen = min_t(u64, len, su - su_offset);
-
- dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
- return 0;
-
-invalid:
- dout(" invalid layout\n");
- *ono = 0;
- *oxoff = 0;
- *oxlen = 0;
- return -EINVAL;
-}
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
-
-/*
* Map an object into a PG.
*
* Should only be called with target_oid and target_oloc (as opposed to
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644
index 000000000000..c36462dc86b7
--- /dev/null
+++ b/net/ceph/striper.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Map a file extent to a stripe unit within an object.
+ * Fill in objno, offset into object, and object extent length (i.e. the
+ * number of bytes mapped, less than or equal to @l->stripe_unit).
+ *
+ * Example for stripe_count = 3, stripes_per_object = 4:
+ *
+ * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19
+ * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6
+ * stripepos | 0 | 1 | 2 | 0 | 1
+ * objno | 0 | 1 | 2 | 3 | 4
+ * objsetno | 0 | 1
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+ u64 off, u64 len,
+ u64 *objno, u64 *objoff, u32 *xlen)
+{
+ u32 stripes_per_object = l->object_size / l->stripe_unit;
+ u64 blockno; /* which su in the file (i.e. globally) */
+ u32 blockoff; /* offset into su */
+ u64 stripeno; /* which stripe */
+ u32 stripepos; /* which su in the stripe,
+ which object in the object set */
+ u64 objsetno; /* which object set */
+ u32 objsetpos; /* which stripe in the object set */
+
+ blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
+ stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
+ objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
+
+ *objno = objsetno * l->stripe_count + stripepos;
+ *objoff = objsetpos * l->stripe_unit + blockoff;
+ *xlen = min_t(u64, len, l->stripe_unit - blockoff);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno). If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+ struct list_head **add_pos)
+{
+ struct list_head *pos;
+
+ list_for_each_prev(pos, object_extents) {
+ struct ceph_object_extent *ex =
+ list_entry(pos, typeof(*ex), oe_item);
+
+ if (ex->oe_objno == objno)
+ return ex;
+
+ if (ex->oe_objno < objno)
+ break;
+ }
+
+ *add_pos = pos;
+ return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+ u64 objoff, u32 xlen)
+{
+ struct ceph_object_extent *ex;
+
+ list_for_each_entry(ex, object_extents, oe_item) {
+ if (ex->oe_objno == objno &&
+ ex->oe_off <= objoff &&
+ ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+ return ex;
+
+ if (ex->oe_objno > objno)
+ break;
+ }
+
+ return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+ struct list_head *object_extents,
+ struct ceph_object_extent *alloc_fn(void *arg),
+ void *alloc_arg,
+ ceph_object_extent_fn_t action_fn,
+ void *action_arg)
+{
+ struct ceph_object_extent *last_ex, *ex;
+
+ while (len) {
+ struct list_head *add_pos = NULL;
+ u64 objno, objoff;
+ u32 xlen;
+
+ ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+ &xlen);
+
+ last_ex = lookup_last(object_extents, objno, &add_pos);
+ if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+ ex = alloc_fn(alloc_arg);
+ if (!ex)
+ return -ENOMEM;
+
+ ex->oe_objno = objno;
+ ex->oe_off = objoff;
+ ex->oe_len = xlen;
+ if (action_fn)
+ action_fn(ex, xlen, action_arg);
+
+ if (!last_ex)
+ list_add(&ex->oe_item, add_pos);
+ else
+ list_add(&ex->oe_item, &last_ex->oe_item);
+ } else {
+ last_ex->oe_len += xlen;
+ if (action_fn)
+ action_fn(last_ex, xlen, action_arg);
+ }
+
+ off += xlen;
+ len -= xlen;
+ }
+
+ for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+ ex = list_next_entry(last_ex, oe_item);
+ &ex->oe_item != object_extents;
+ last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+ if (last_ex->oe_objno > ex->oe_objno ||
+ (last_ex->oe_objno == ex->oe_objno &&
+ last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+ WARN(1, "%s: object_extents list not sorted!\n",
+ __func__);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+ struct list_head *object_extents,
+ ceph_object_extent_fn_t action_fn,
+ void *action_arg)
+{
+ while (len) {
+ struct ceph_object_extent *ex;
+ u64 objno, objoff;
+ u32 xlen;
+
+ ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+ &xlen);
+
+ ex = lookup_containing(object_extents, objno, objoff, xlen);
+ if (!ex) {
+ WARN(1, "%s: objno %llu %llu~%u not found!\n",
+ __func__, objno, objoff, xlen);
+ return -EINVAL;
+ }
+
+ action_fn(ex, xlen, action_arg);
+
+ off += xlen;
+ len -= xlen;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ * kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+ u64 objno, u64 objoff, u64 objlen,
+ struct ceph_file_extent **file_extents,
+ u32 *num_file_extents)
+{
+ u32 stripes_per_object = l->object_size / l->stripe_unit;
+ u64 blockno; /* which su */
+ u32 blockoff; /* offset into su */
+ u64 stripeno; /* which stripe */
+ u32 stripepos; /* which su in the stripe,
+ which object in the object set */
+ u64 objsetno; /* which object set */
+ u32 i = 0;
+
+ if (!objlen) {
+ *file_extents = NULL;
+ *num_file_extents = 0;
+ return 0;
+ }
+
+ *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+ DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+ *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+ GFP_NOIO);
+ if (!*file_extents)
+ return -ENOMEM;
+
+ div_u64_rem(objoff, l->stripe_unit, &blockoff);
+ while (objlen) {
+ u64 off, len;
+
+ objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+ stripeno = div_u64(objoff, l->stripe_unit) +
+ objsetno * stripes_per_object;
+ blockno = stripeno * l->stripe_count + stripepos;
+ off = blockno * l->stripe_unit + blockoff;
+ len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+ (*file_extents)[i].fe_off = off;
+ (*file_extents)[i].fe_len = len;
+
+ blockoff = 0;
+ objoff += len;
+ objlen -= len;
+ i++;
+ }
+
+ BUG_ON(i != *num_file_extents);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);
diff --git a/net/compat.c b/net/compat.c
index 22381719718c..5ae7437d3853 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -383,8 +383,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
return sock_setsockopt(sock, level, optname, optval, optlen);
}
-COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
- char __user *, optval, unsigned int, optlen)
+static int __compat_sys_setsockopt(int fd, int level, int optname,
+ char __user *optval, unsigned int optlen)
{
int err;
struct socket *sock = sockfd_lookup(fd, &err);
@@ -410,6 +410,12 @@ COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
return err;
}
+COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
+ char __user *, optval, unsigned int, optlen)
+{
+ return __compat_sys_setsockopt(fd, level, optname, optval, optlen);
+}
+
static int do_get_sock_timeout(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -503,8 +509,9 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
}
EXPORT_SYMBOL(compat_sock_get_timestampns);
-COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
- char __user *, optval, int __user *, optlen)
+static int __compat_sys_getsockopt(int fd, int level, int optname,
+ char __user *optval,
+ int __user *optlen)
{
int err;
struct socket *sock = sockfd_lookup(fd, &err);
@@ -530,6 +537,12 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
return err;
}
+COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
+ char __user *, optval, int __user *, optlen)
+{
+ return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
+}
+
struct compat_group_req {
__u32 gr_interface;
struct __kernel_sockaddr_storage gr_group
@@ -734,38 +747,72 @@ static unsigned char nas[21] = {
};
#undef AL
-COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags)
+static inline long __compat_sys_sendmsg(int fd,
+ struct compat_msghdr __user *msg,
+ unsigned int flags)
{
- return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ return __sys_sendmsg(fd, (struct user_msghdr __user *)msg,
+ flags | MSG_CMSG_COMPAT, false);
+}
+
+COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg,
+ unsigned int, flags)
+{
+ return __compat_sys_sendmsg(fd, msg, flags);
+}
+
+static inline long __compat_sys_sendmmsg(int fd,
+ struct compat_mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags)
+{
+ return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+ flags | MSG_CMSG_COMPAT, false);
}
COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags)
{
- return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
- flags | MSG_CMSG_COMPAT);
+ return __compat_sys_sendmmsg(fd, mmsg, vlen, flags);
+}
+
+static inline long __compat_sys_recvmsg(int fd,
+ struct compat_msghdr __user *msg,
+ unsigned int flags)
+{
+ return __sys_recvmsg(fd, (struct user_msghdr __user *)msg,
+ flags | MSG_CMSG_COMPAT, false);
+}
+
+COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg,
+ unsigned int, flags)
+{
+ return __compat_sys_recvmsg(fd, msg, flags);
}
-COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags)
+static inline long __compat_sys_recvfrom(int fd, void __user *buf,
+ compat_size_t len, unsigned int flags,
+ struct sockaddr __user *addr,
+ int __user *addrlen)
{
- return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr,
+ addrlen);
}
COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags)
{
- return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT);
+ return __compat_sys_recvfrom(fd, buf, len, flags, NULL, NULL);
}
COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len,
unsigned int, flags, struct sockaddr __user *, addr,
int __user *, addrlen)
{
- return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
+ return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen);
}
-COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
- unsigned int, vlen, unsigned int, flags,
- struct compat_timespec __user *, timeout)
+static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct compat_timespec __user *timeout)
{
int datagrams;
struct timespec ktspec;
@@ -785,6 +832,13 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
return datagrams;
}
+COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
+ unsigned int, vlen, unsigned int, flags,
+ struct compat_timespec __user *, timeout)
+{
+ return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+}
+
COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
{
u32 a[AUDITSC_ARGS];
@@ -810,68 +864,72 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
switch (call) {
case SYS_SOCKET:
- ret = sys_socket(a0, a1, a[2]);
+ ret = __sys_socket(a0, a1, a[2]);
break;
case SYS_BIND:
- ret = sys_bind(a0, compat_ptr(a1), a[2]);
+ ret = __sys_bind(a0, compat_ptr(a1), a[2]);
break;
case SYS_CONNECT:
- ret = sys_connect(a0, compat_ptr(a1), a[2]);
+ ret = __sys_connect(a0, compat_ptr(a1), a[2]);
break;
case SYS_LISTEN:
- ret = sys_listen(a0, a1);
+ ret = __sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+ ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
break;
case SYS_GETSOCKNAME:
- ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
+ ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
break;
case SYS_GETPEERNAME:
- ret = sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2]));
+ ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2]));
break;
case SYS_SOCKETPAIR:
- ret = sys_socketpair(a0, a1, a[2], compat_ptr(a[3]));
+ ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3]));
break;
case SYS_SEND:
- ret = sys_send(a0, compat_ptr(a1), a[2], a[3]);
+ ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], NULL, 0);
break;
case SYS_SENDTO:
- ret = sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]);
+ ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3],
+ compat_ptr(a[4]), a[5]);
break;
case SYS_RECV:
- ret = compat_sys_recv(a0, compat_ptr(a1), a[2], a[3]);
+ ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3],
+ NULL, NULL);
break;
case SYS_RECVFROM:
- ret = compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3],
- compat_ptr(a[4]), compat_ptr(a[5]));
+ ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3],
+ compat_ptr(a[4]),
+ compat_ptr(a[5]));
break;
case SYS_SHUTDOWN:
- ret = sys_shutdown(a0, a1);
+ ret = __sys_shutdown(a0, a1);
break;
case SYS_SETSOCKOPT:
- ret = compat_sys_setsockopt(a0, a1, a[2],
- compat_ptr(a[3]), a[4]);
+ ret = __compat_sys_setsockopt(a0, a1, a[2],
+ compat_ptr(a[3]), a[4]);
break;
case SYS_GETSOCKOPT:
- ret = compat_sys_getsockopt(a0, a1, a[2],
- compat_ptr(a[3]), compat_ptr(a[4]));
+ ret = __compat_sys_getsockopt(a0, a1, a[2],
+ compat_ptr(a[3]),
+ compat_ptr(a[4]));
break;
case SYS_SENDMSG:
- ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
+ ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
break;
case SYS_SENDMMSG:
- ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
+ ret = __compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
break;
case SYS_RECVMSG:
- ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
+ ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
case SYS_RECVMMSG:
- ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
- compat_ptr(a[4]));
+ ret = __compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
+ compat_ptr(a[4]));
break;
case SYS_ACCEPT4:
- ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
+ ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;
default:
ret = -EINVAL;
diff --git a/net/core/dev.c b/net/core/dev.c
index d4362befe7e2..969462ebb296 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1027,7 +1027,7 @@ bool dev_valid_name(const char *name)
{
if (*name == '\0')
return false;
- if (strlen(name) >= IFNAMSIZ)
+ if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
return false;
if (!strcmp(name, ".") || !strcmp(name, ".."))
return false;
@@ -1571,6 +1571,27 @@ static void dev_disable_gro_hw(struct net_device *dev)
netdev_WARN(dev, "failed to disable GRO_HW!\n");
}
+const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+{
+#define N(val) \
+ case NETDEV_##val: \
+ return "NETDEV_" __stringify(val);
+ switch (cmd) {
+ N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+ N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+ N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+ N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+ N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+ N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+ N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+ N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
+ N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ };
+#undef N
+ return "UNKNOWN_NETDEV_EVENT";
+}
+EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
@@ -1604,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
@@ -1626,6 +1649,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
rollback:
@@ -1670,6 +1694,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_chain, nb);
if (err)
@@ -1687,6 +1713,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
}
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
@@ -2378,7 +2405,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
@@ -2735,7 +2762,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
return 0;
- eth = (struct ethhdr *)skb_mac_header(skb);
+ eth = (struct ethhdr *)skb->data;
type = eth->h_proto;
}
@@ -3278,15 +3305,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
static void skb_update_prio(struct sk_buff *skb)
{
- struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+ const struct netprio_map *map;
+ const struct sock *sk;
+ unsigned int prioidx;
+
+ if (skb->priority)
+ return;
+ map = rcu_dereference_bh(skb->dev->priomap);
+ if (!map)
+ return;
+ sk = skb_to_full_sk(skb);
+ if (!sk)
+ return;
- if (!skb->priority && skb->sk && map) {
- unsigned int prioidx =
- sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
+ prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
- if (prioidx < map->priomap_len)
- skb->priority = map->priomap[prioidx];
- }
+ if (prioidx < map->priomap_len)
+ skb->priority = map->priomap[prioidx];
}
#else
#define skb_update_prio(skb)
@@ -4351,6 +4386,9 @@ int netdev_rx_handler_register(struct net_device *dev,
if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
+ if (dev->priv_flags & IFF_NO_RX_HANDLER)
+ return -EINVAL;
+
/* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -6396,6 +6434,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
.linking = true,
.upper_info = upper_info,
};
+ struct net_device *master_dev;
int ret = 0;
ASSERT_RTNL();
@@ -6407,11 +6446,14 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (netdev_has_upper_dev(upper_dev, dev))
return -EBUSY;
- if (netdev_has_upper_dev(dev, upper_dev))
- return -EEXIST;
-
- if (master && netdev_master_upper_dev_get(dev))
- return -EBUSY;
+ if (!master) {
+ if (netdev_has_upper_dev(dev, upper_dev))
+ return -EEXIST;
+ } else {
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
+ return master_dev == upper_dev ? -EEXIST : -EBUSY;
+ }
ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
@@ -7542,6 +7584,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ /* LRO/HW-GRO features cannot be combined with RX-FCS */
+ if (features & NETIF_F_RXFCS) {
+ if (features & NETIF_F_LRO) {
+ netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7613,6 +7668,24 @@ sync_lower:
}
}
+ if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_ctag_filter_info(dev);
+ } else {
+ vlan_drop_rx_ctag_filter_info(dev);
+ }
+ }
+
+ if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_stag_filter_info(dev);
+ } else {
+ vlan_drop_rx_stag_filter_info(dev);
+ }
+ }
+
dev->features = features;
}
@@ -7998,7 +8071,8 @@ int register_netdev(struct net_device *dev)
{
int err;
- rtnl_lock();
+ if (rtnl_lock_killable())
+ return -EINTR;
err = register_netdevice(dev);
rtnl_unlock();
return err;
@@ -8048,7 +8122,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
rcu_barrier();
rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
@@ -8120,10 +8193,6 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- __rtnl_unlock();
-
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
@@ -8141,8 +8210,9 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
WARN_ON(dev->dn_ptr);
-
+#endif
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
@@ -8564,7 +8634,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
new_nsid = peernet2id_alloc(dev_net(dev), net);
/* If there is an ifindex conflict assign a new one */
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c0548d268e1a..e3e6a3e2ca22 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -57,8 +57,8 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
return -EINVAL;
list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- ha->type == addr_type) {
+ if (ha->type == addr_type &&
+ !memcmp(ha->addr, addr, addr_len)) {
if (global) {
/* check if addr is already used as global */
if (ha->global_use)
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0ab1af04296c..a04e1e88bf3a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
if (colon)
*colon = 0;
- dev_load(net, ifr->ifr_name);
-
/*
* See which interface the caller is talking about.
*/
@@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
+ dev_load(net, ifr->ifr_name);
rcu_read_lock();
ret = dev_ifsioc_locked(net, ifr, cmd);
rcu_read_unlock();
@@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
return ret;
case SIOCETHTOOL:
+ dev_load(net, ifr->ifr_name);
rtnl_lock();
ret = dev_ethtool(net, ifr);
rtnl_unlock();
@@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSIFNAME:
+ dev_load(net, ifr->ifr_name);
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
rtnl_lock();
@@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
+ dev_load(net, ifr->ifr_name);
rtnl_lock();
ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
@@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
+ dev_load(net, ifr->ifr_name);
rtnl_lock();
ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 18d385ed8237..ad1317376798 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1695,10 +1695,11 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
goto nla_put_failure;
if (table->resource_valid) {
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
- table->resource_id, DEVLINK_ATTR_PAD);
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
- table->resource_units, DEVLINK_ATTR_PAD);
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
}
if (devlink_dpipe_matches_put(table, skb))
goto nla_put_failure;
@@ -1797,7 +1798,7 @@ send_done:
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
@@ -1806,7 +1807,6 @@ send_done:
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
-err_skb_send_alloc:
genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
@@ -2072,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info,
table->counters_enabled,
&dump_ctx);
if (err)
- goto err_entries_dump;
+ return err;
send_done:
nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
@@ -2080,16 +2080,10 @@ send_done:
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
return genlmsg_reply(dump_ctx.skb, info);
-
-err_entries_dump:
-err_skb_send_alloc:
- genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
- nlmsg_free(dump_ctx.skb);
- return err;
}
static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
@@ -2228,7 +2222,7 @@ send_done:
if (!nlh) {
err = devlink_dpipe_send_and_alloc_skb(&skb, info);
if (err)
- goto err_skb_send_alloc;
+ return err;
goto send_done;
}
return genlmsg_reply(skb, info);
@@ -2236,7 +2230,6 @@ send_done:
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
-err_skb_send_alloc:
genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
@@ -2332,12 +2325,38 @@ devlink_resource_validate_children(struct devlink_resource *resource)
list_for_each_entry(child_resource, &resource->resource_list, list)
parts_size += child_resource->size_new;
- if (parts_size > resource->size)
+ if (parts_size > resource->size_new)
size_valid = false;
out:
resource->size_valid = size_valid;
}
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
struct genl_info *info)
{
@@ -2356,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
if (!resource)
return -EINVAL;
- if (!resource->resource_ops->size_validate)
- return -EINVAL;
-
size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = resource->resource_ops->size_validate(devlink, size,
- info->extack);
+ err = devlink_resource_validate_size(resource, size, info->extack);
if (err)
return err;
@@ -2372,20 +2387,32 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
return 0;
}
-static void
+static int
devlink_resource_size_params_put(struct devlink_resource *resource,
struct sk_buff *skb)
{
struct devlink_resource_size_params *size_params;
- size_params = resource->size_params;
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
- size_params->size_granularity, DEVLINK_ATTR_PAD);
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
- size_params->size_max, DEVLINK_ATTR_PAD);
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
- size_params->size_min, DEVLINK_ATTR_PAD);
- nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
+ size_params = &resource->size_params;
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min, DEVLINK_ATTR_PAD) ||
+ nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ if (!resource->occ_get)
+ return 0;
+ return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->occ_get(resource->occ_get_priv),
+ DEVLINK_ATTR_PAD);
}
static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
@@ -2408,11 +2435,10 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
if (resource->size != resource->size_new)
nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
resource->size_new, DEVLINK_ATTR_PAD);
- if (resource->resource_ops && resource->resource_ops->occ_get)
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
- resource->resource_ops->occ_get(devlink),
- DEVLINK_ATTR_PAD);
- devlink_resource_size_params_put(resource, skb);
+ if (devlink_resource_occ_put(resource, skb))
+ goto nla_put_failure;
+ if (devlink_resource_size_params_put(resource, skb))
+ goto nla_put_failure;
if (list_empty(&resource->resource_list))
goto out;
@@ -2717,22 +2743,22 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.doit = devlink_nl_cmd_dpipe_table_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.doit = devlink_nl_cmd_dpipe_entries_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.doit = devlink_nl_cmd_dpipe_headers_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2752,8 +2778,8 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_RELOAD,
@@ -3143,21 +3169,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
* @resource_id: resource's id
* @parent_reosurce_id: resource's parent id
* @size params: size parameters
- * @resource_ops: resource ops
*/
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
- bool top_hierarchy,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
- struct devlink_resource_size_params *size_params,
- const struct devlink_resource_ops *resource_ops)
+ const struct devlink_resource_size_params *size_params)
{
struct devlink_resource *resource;
struct list_head *resource_list;
+ bool top_hierarchy;
int err = 0;
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource) {
@@ -3192,9 +3218,9 @@ int devlink_resource_register(struct devlink *devlink,
resource->size = resource_size;
resource->size_new = resource_size;
resource->id = resource_id;
- resource->resource_ops = resource_ops;
resource->size_valid = true;
- resource->size_params = size_params;
+ memcpy(&resource->size_params, size_params,
+ sizeof(resource->size_params));
INIT_LIST_HEAD(&resource->resource_list);
list_add_tail(&resource->list, resource_list);
out:
@@ -3293,6 +3319,58 @@ out:
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
+/**
+ * devlink_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ struct devlink_resource *resource;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ goto out;
+ WARN_ON(resource->occ_get);
+
+ resource->occ_get = occ_get;
+ resource->occ_get_priv = occ_get_priv;
+out:
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
+ struct devlink_resource *resource;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ goto out;
+ WARN_ON(!resource->occ_get);
+
+ resource->occ_get = NULL;
+ resource->occ_get_priv = NULL;
+out:
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
- const struct in6_addr *addr)
+ const struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
- idst->in6_saddr = *addr;
+ idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 494e6a5d7306..03416e6dd5d7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -22,6 +22,7 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/sfp.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
@@ -107,6 +108,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
[NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
};
static const char
@@ -121,6 +123,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_ID_UNSPEC] = "Unspec",
[ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
[ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
};
static const char
@@ -1022,6 +1025,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* If FLOW_RSS was requested then user-space must be using the
+ * new definition, as FLOW_RSS is newer.
+ */
+ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+ info_size = sizeof(info);
+ if (copy_from_user(&info, useraddr, info_size))
+ return -EFAULT;
+ }
+
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1263,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
rxfh.indir_size = dev_indir_size;
rxfh.key_size = dev_key_size;
@@ -1276,7 +1290,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
hkey = rss_config + indir_bytes;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ if (rxfh.rss_context)
+ ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+ &dev_hfunc,
+ rxfh.rss_context);
+ else
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
if (ret)
goto out;
@@ -1306,6 +1325,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u8 *hkey = NULL;
u8 *rss_config;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+ bool delete = false;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1319,9 +1339,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->set_rxfh_context)
+ return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
* Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1368,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
- /* rxfh.indir_size == 0 means reset the indir table to default.
+ /* rxfh.indir_size == 0 means reset the indir table to default (master
+ * context) or delete the context (other RSS contexts).
* rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
*/
if (rxfh.indir_size &&
@@ -1359,9 +1382,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
} else if (rxfh.indir_size == 0) {
- indir = (u32 *)rss_config;
- for (i = 0; i < dev_indir_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ if (rxfh.rss_context == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ } else {
+ delete = true;
+ }
}
if (rxfh.key_size) {
@@ -1374,15 +1401,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (rxfh.rss_context)
+ ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+ &rxfh.rss_context, delete);
+ else
+ ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
if (ret)
goto out;
- /* indicate whether rxfh was set to default */
- if (rxfh.indir_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+ &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ ret = -EFAULT;
+
+ if (!rxfh.rss_context) {
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ }
out:
kfree(rss_config);
@@ -2210,6 +2247,9 @@ static int __ethtool_get_module_info(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_info(dev->sfp_bus, modinfo);
+
if (phydev && phydev->drv && phydev->drv->module_info)
return phydev->drv->module_info(phydev, modinfo);
@@ -2244,6 +2284,9 @@ static int __ethtool_get_module_eeprom(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
+
if (phydev && phydev->drv && phydev->drv->module_eeprom)
return phydev->drv->module_eeprom(phydev, ee, data);
@@ -2277,6 +2320,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
tuna->type_id != ETHTOOL_TUNABLE_U32)
return -EINVAL;
break;
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ if (tuna->len != sizeof(u16) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U16)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -2520,11 +2568,14 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+ int rc;
if (!dev->ethtool_ops->get_fecparam)
return -EOPNOTSUPP;
- dev->ethtool_ops->get_fecparam(dev, &fecparam);
+ rc = dev->ethtool_ops->get_fecparam(dev, &fecparam);
+ if (rc)
+ return rc;
if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
return -EFAULT;
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..13a40b831d6d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ err = nb->notifier_call(nb, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
@@ -33,6 +39,7 @@ static unsigned int fib_seq_sum(void)
struct net *net;
rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net) {
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
@@ -43,6 +50,7 @@ static unsigned int fib_seq_sum(void)
}
rcu_read_unlock();
}
+ up_read(&net_rwsem);
rtnl_unlock();
return fib_seq;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..33958f84c173 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+ r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
: fib_default_rule_pref(ops);
+ rule->proto = tb[FRA_PROTOCOL] ?
+ nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -575,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout_free;
+ err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
+ extack);
+ if (err < 0)
+ goto errout_free;
+
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
@@ -611,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -630,6 +690,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +725,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
+ if (tb[FRA_PROTOCOL] &&
+ (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+ continue;
+
if (frh->action && (frh->action != rule->action))
continue;
@@ -704,6 +784,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -781,7 +873,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
- + nla_total_size(sizeof(struct fib_kuid_range));
+ + nla_total_size(sizeof(struct fib_kuid_range))
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -812,6 +908,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
+ if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+ goto nla_put_failure;
+
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +942,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c121adbdbaa..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/gfp.h>
+#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
@@ -1855,7 +1856,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
- if (unlikely(flags))
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.key = key;
@@ -1890,6 +1891,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+ struct bpf_map *, map, u32, key, u64, flags)
+{
+ /* If user passes invalid input drop the packet. */
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return SK_DROP;
+
+ msg->key = key;
+ msg->flags = flags;
+ msg->map = map;
+
+ return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+ struct sock *sk = NULL;
+
+ if (msg->map) {
+ sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+ msg->key = 0;
+ msg->map = NULL;
+ }
+
+ return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+ .func = bpf_msg_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->apply_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+ .func = bpf_msg_apply_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->cork_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+ .func = bpf_msg_cork_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+ struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+ unsigned int len = 0, offset = 0, copy = 0;
+ struct scatterlist *sg = msg->sg_data;
+ int first_sg, last_sg, i, shift;
+ unsigned char *p, *to, *from;
+ int bytes = end - start;
+ struct page *page;
+
+ if (unlikely(flags || end <= start))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg_start;
+ do {
+ len = sg[i].length;
+ offset += len;
+ if (start < offset + len)
+ break;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != msg->sg_end);
+
+ if (unlikely(start >= offset + len))
+ return -EINVAL;
+
+ if (!msg->sg_copy[i] && bytes <= len)
+ goto out;
+
+ first_sg = i;
+
+ /* At this point we need to linearize multiple scatterlist
+ * elements or a single shared page. Either way we need to
+ * copy into a linear buffer exclusively owned by BPF. Then
+ * place the buffer in the scatterlist and fixup the original
+ * entries by removing the entries now in the linear buffer
+ * and shifting the remaining entries. For now we do not try
+ * to copy partial entries to avoid complexity of running out
+ * of sg_entry slots. The downside is reading a single byte
+ * will copy the entire sg entry.
+ */
+ do {
+ copy += sg[i].length;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ if (bytes < copy)
+ break;
+ } while (i != msg->sg_end);
+ last_sg = i;
+
+ if (unlikely(copy < end - start))
+ return -EINVAL;
+
+ page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+ if (unlikely(!page))
+ return -ENOMEM;
+ p = page_address(page);
+ offset = 0;
+
+ i = first_sg;
+ do {
+ from = sg_virt(&sg[i]);
+ len = sg[i].length;
+ to = p + offset;
+
+ memcpy(to, from, len);
+ offset += len;
+ sg[i].length = 0;
+ put_page(sg_page(&sg[i]));
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != last_sg);
+
+ sg[first_sg].length = copy;
+ sg_set_page(&sg[first_sg], page, copy, 0);
+
+ /* To repair sg ring we need to shift entries. If we only
+ * had a single entry though we can just replace it and
+ * be done. Otherwise walk the ring and shift the entries.
+ */
+ shift = last_sg - first_sg - 1;
+ if (!shift)
+ goto out;
+
+ i = first_sg + 1;
+ do {
+ int move_from;
+
+ if (i + shift >= MAX_SKB_FRAGS)
+ move_from = i + shift - MAX_SKB_FRAGS;
+ else
+ move_from = i + shift;
+
+ if (move_from == msg->sg_end)
+ break;
+
+ sg[i] = sg[move_from];
+ sg[move_from].length = 0;
+ sg[move_from].page_link = 0;
+ sg[move_from].offset = 0;
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (1);
+ msg->sg_end -= shift;
+ if (msg->sg_end < 0)
+ msg->sg_end += MAX_SKB_FRAGS;
+out:
+ msg->data = sg_virt(&sg[i]) + start - offset;
+ msg->data_end = msg->data + bytes;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+ .func = bpf_msg_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -2087,6 +2284,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
@@ -2096,19 +2297,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* SKB_GSO_TCPV4 needs to be changed into
* SKB_GSO_TCPV6.
*/
- if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
- skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ if (shinfo->gso_type & SKB_GSO_TCPV4) {
+ shinfo->gso_type &= ~SKB_GSO_TCPV4;
+ shinfo->gso_type |= SKB_GSO_TCPV6;
}
/* Due to IPv6 header, MSS needs to be downgraded. */
- skb_shinfo(skb)->gso_size -= len_diff;
+ skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
@@ -2123,6 +2326,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
@@ -2132,19 +2339,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* SKB_GSO_TCPV6 needs to be changed into
* SKB_GSO_TCPV4.
*/
- if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
- skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ if (shinfo->gso_type & SKB_GSO_TCPV6) {
+ shinfo->gso_type &= ~SKB_GSO_TCPV6;
+ shinfo->gso_type |= SKB_GSO_TCPV4;
}
/* Due to IPv4 header, MSS can be upgraded. */
- skb_shinfo(skb)->gso_size += len_diff;
+ skb_increase_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
@@ -2243,6 +2452,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
@@ -2252,11 +2465,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* Due to header grow, MSS needs to be downgraded. */
- skb_shinfo(skb)->gso_size -= len_diff;
+ skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
return 0;
@@ -2267,6 +2482,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
+ /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+ if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ return -ENOTSUPP;
+
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
@@ -2276,11 +2495,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
return ret;
if (skb_is_gso(skb)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* Due to header shrink, MSS can be upgraded. */
- skb_shinfo(skb)->gso_size += len_diff;
+ skb_increase_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
+ shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_segs = 0;
}
return 0;
@@ -2831,7 +3052,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta)
+ func == bpf_xdp_adjust_meta ||
+ func == bpf_msg_pull_data)
return true;
return false;
@@ -2991,7 +3213,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -3025,6 +3247,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM;
+ if (flags & BPF_F_SEQ_NUMBER)
+ info->key.tun_flags |= TUNNEL_SEQ;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3239,6 +3463,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+ } else if (level == SOL_IP) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ inet->tos = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
@@ -3338,6 +3583,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+ } else if (level == SOL_IP) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ *((int *)optval) = (int)inet->tos;
+ break;
+ default:
+ goto err_clear;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -3398,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
.arg2_type = ARG_ANYTHING,
};
+const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+
+BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
+ int, addr_len)
+{
+#ifdef CONFIG_INET
+ struct sock *sk = ctx->sk;
+ int err;
+
+ /* Binding to port can be expensive so it's prohibited in the helper.
+ * Only binding to IP is supported.
+ */
+ err = -EINVAL;
+ if (addr->sa_family == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return err;
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ return err;
+ return __inet_bind(sk, addr, addr_len, true, false);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (addr->sa_family == AF_INET6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+ return err;
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ return err;
+ /* ipv6_bpf_stub cannot be NULL, since it's called from
+ * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
+ */
+ return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+#endif /* CONFIG_IPV6 */
+ }
+#endif /* CONFIG_INET */
+
+ return -EAFNOSUPPORT;
+}
+
+static const struct bpf_func_proto bpf_bind_proto = {
+ .func = bpf_bind,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3427,7 +3732,21 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-sock_filter_func_proto(enum bpf_func_id func_id)
+sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ /* inet and inet6 sockets are created in a process
+ * context so there is always a valid uid/gid
+ */
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
/* inet and inet6 sockets are created in a process
@@ -3435,13 +3754,21 @@ sock_filter_func_proto(enum bpf_func_id func_id)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_bind:
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ return &bpf_bind_proto;
+ default:
+ return NULL;
+ }
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3456,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-tc_cls_act_func_proto(enum bpf_func_id func_id)
+tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3523,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-xdp_func_proto(enum bpf_func_id func_id)
+xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -3546,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id)
+lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3573,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
- sock_ops_func_proto(enum bpf_func_id func_id)
+sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_setsockopt:
@@ -3589,7 +3916,25 @@ static const struct bpf_func_proto *
}
}
-static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_msg_redirect_map:
+ return &bpf_msg_redirect_map_proto;
+ case BPF_FUNC_msg_apply_bytes:
+ return &bpf_msg_apply_bytes_proto;
+ case BPF_FUNC_msg_cork_bytes:
+ return &bpf_msg_cork_bytes_proto;
+ case BPF_FUNC_msg_pull_data:
+ return &bpf_msg_pull_data_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3614,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_xmit_func_proto(enum bpf_func_id func_id)
+lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_get_tunnel_key:
@@ -3644,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
- return lwt_inout_func_proto(func_id);
+ return lwt_inout_func_proto(func_id, prog);
}
}
static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3692,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3712,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3746,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
+
+/* Attach type specific accesses */
+static bool __sock_filter_check_attach_type(int off,
+ enum bpf_access_type access_type,
+ enum bpf_attach_type attach_type)
{
- if (type == BPF_WRITE) {
- switch (off) {
- case offsetof(struct bpf_sock, bound_dev_if):
- case offsetof(struct bpf_sock, mark):
- case offsetof(struct bpf_sock, priority):
- break;
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ case offsetof(struct bpf_sock, mark):
+ case offsetof(struct bpf_sock, priority):
+ switch (attach_type) {
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ goto full_access;
+ default:
+ return false;
+ }
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ switch (attach_type) {
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ case bpf_ctx_range(struct bpf_sock, src_port):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
default:
return false;
}
}
+read_only:
+ return access_type == BPF_READ;
+full_access:
+ return true;
+}
- if (off < 0 || off + size > sizeof(struct bpf_sock))
+static bool __sock_filter_check_size(int off, int size,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
+ }
+
+ return size == size_default;
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
- /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
+ if (!__sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type))
+ return false;
+ if (!__sock_filter_check_size(off, size, info))
return false;
-
return true;
}
@@ -3822,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE) {
@@ -3851,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return false;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -3868,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
static bool xdp_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
@@ -3898,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool sock_addr_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct bpf_sock_addr))
+ return false;
+ if (off % size != 0)
+ return false;
+
+ /* Disallow access to IPv6 fields from IPv4 contex and vise
+ * versa.
+ */
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET6_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ /* Only narrow read access allowed for now. */
+ if (type == BPF_READ) {
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+ return false;
+ } else {
+ if (size != size_default)
+ return false;
+ }
+ break;
+ case bpf_ctx_range(struct bpf_sock_addr, user_port):
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ if (type == BPF_READ) {
+ if (size != size_default)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool sock_ops_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3946,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3975,7 +4443,34 @@ static bool sk_skb_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
+static bool sk_msg_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct sk_msg_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ if (off < 0 || off >= sizeof(struct sk_msg_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -4283,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
@@ -4338,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
+
+ case offsetof(struct bpf_sock, src_ip4):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_rcv_saddr,
+ FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr),
+ target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ off = si->off;
+ off -= offsetof(struct bpf_sock, src_ip6[0]);
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(
+ struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0],
+ FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]),
+ target_size) + off);
+#else
+ (void)off;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock, src_port):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_num,
+ FIELD_SIZEOF(struct sock_common,
+ skc_num),
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -4413,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+ * context Structure, F is Field in context structure that contains a pointer
+ * to Nested Structure of type NS that has the field NF.
+ *
+ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+ * sure that SIZE is not greater than actual size of S.F.NF.
+ *
+ * If offset OFF is provided, the load happens from that offset relative to
+ * offset of NF.
+ */
+#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
+ do { \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
+ si->src_reg, offsetof(S, F)); \
+ *insn++ = BPF_LDX_MEM( \
+ SIZE, si->dst_reg, si->dst_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
+ BPF_FIELD_SIZEOF(NS, NF), 0)
+
+/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+ *
+ * It doesn't support SIZE argument though since narrow stores are not
+ * supported for now.
+ *
+ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+ * "register" since two registers available in convert_ctx_access are not
+ * enough: we can't override neither SRC, since it contains value to store, nor
+ * DST since it contains pointer to context that may be used by later
+ * instructions. But we need a temporary place to save pointer to nested
+ * structure whose field we want to store to.
+ */
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
+ do { \
+ int tmp_reg = BPF_REG_9; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
+ offsetof(S, TF)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
+ si->dst_reg, offsetof(S, F)); \
+ *insn++ = BPF_STX_MEM( \
+ BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
+ offsetof(S, TF)); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+ TF) \
+ do { \
+ if (type == BPF_WRITE) { \
+ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
+ TF); \
+ } else { \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, SIZE, OFF); \
+ } \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+
+static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sock_addr, user_family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr, uaddr, sa_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_ip4):
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
+ sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ off = si->off;
+ off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+ sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
+ tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_port):
+ /* To get port we need to know sa_family first and then treat
+ * sockaddr as either sockaddr_in or sockaddr_in6.
+ * Though we can simplify since port field has same offset and
+ * size in both structures.
+ * Here we check this invariant and use just one of the
+ * structures if it's true.
+ */
+ BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
+ offsetof(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
+ FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr_in6, uaddr,
+ sin6_port, tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, type):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ break;
+
+ case offsetof(struct bpf_sock_addr, protocol):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+ SK_FL_PROTO_SHIFT);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -4776,6 +5455,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sk_msg_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data));
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -4847,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
const struct bpf_prog_ops cg_sock_prog_ops = {
};
+const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+ .get_func_proto = sock_addr_func_proto,
+ .is_valid_access = sock_addr_is_valid_access,
+ .convert_ctx_access = sock_addr_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+};
+
const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
@@ -4866,6 +5577,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
const struct bpf_prog_ops sk_skb_prog_ops = {
};
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+ .get_func_proto = sk_msg_func_proto,
+ .is_valid_access = sk_msg_is_valid_access,
+ .convert_ctx_access = sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
}
EXPORT_SYMBOL(__get_hash_from_flowi6);
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
- memset(keys, 0, sizeof(*keys));
-
- keys->addrs.v4addrs.src = fl4->saddr;
- keys->addrs.v4addrs.dst = fl4->daddr;
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys->ports.src = fl4->fl4_sport;
- keys->ports.dst = fl4->fl4_dport;
- keys->keyid.keyid = fl4->fl4_gre_key;
- keys->basic.ip_proto = fl4->flowi4_proto;
-
- return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..9737302907b1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+ if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
goto out;
- if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+ if (!proc_create("softnet_stat", 0444, net->proc_net,
&softnet_seq_fops))
goto out_dev;
- if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+ if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
goto out_softnet;
if (wext_proc_init(net))
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 60a5ad2c33ee..c476f0794132 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
return netdev_store(dev, attr, buf, len, change_group);
}
NETDEVICE_SHOW(group, fmt_dec);
-static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
static int change_proto_down(struct net_device *dev, unsigned long proto_down)
{
@@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
}
static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
- = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+ = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
- = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+ = __ATTR(rps_flow_cnt, 0644,
show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */
@@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
- = __ATTR(hold_time, S_IRUGO | S_IWUSR,
+ = __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
static ssize_t bql_show_inflight(struct netdev_queue *queue,
@@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
- __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+ __ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
@@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
} \
\
static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
- = __ATTR(NAME, S_IRUGO | S_IWUSR, \
+ = __ATTR(NAME, 0644, \
bql_show_ ## NAME, bql_set_ ## NAME)
BQL_ATTR(limit, limit);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..a11e03f920d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,11 +29,14 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
struct net init_net = {
.count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
@@ -41,6 +44,14 @@ struct net init_net = {
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+/*
+ * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ * This is internal net namespace object. Please, don't use it
+ * outside.
+ */
+DECLARE_RWSEM(pernet_ops_rwsem);
+EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +76,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;
- BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
- lockdep_is_held(&net_mutex));
+ lockdep_is_held(&pernet_ops_rwsem));
if (old_ng->s.len > id) {
old_ng->ptr[id] = data;
return 0;
@@ -286,7 +296,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
*/
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
- /* Must be called with net_mutex held */
+ /* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
@@ -297,12 +307,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->user_ns = user_ns;
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
if (error < 0)
goto out_undo;
}
+ down_write(&net_rwsem);
+ list_add_tail_rcu(&net->list, &net_namespace_list);
+ up_write(&net_rwsem);
out:
return error;
@@ -354,7 +368,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
@@ -408,32 +422,27 @@ struct net *copy_net_ns(unsigned long flags,
net = net_alloc();
if (!net) {
- dec_net_namespaces(ucounts);
- return ERR_PTR(-ENOMEM);
+ rv = -ENOMEM;
+ goto dec_ucounts;
}
-
+ refcount_set(&net->passive, 1);
+ net->ucounts = ucounts;
get_user_ns(user_ns);
- rv = mutex_lock_killable(&net_mutex);
- if (rv < 0) {
- net_free(net);
- dec_net_namespaces(ucounts);
- put_user_ns(user_ns);
- return ERR_PTR(rv);
- }
+ rv = down_read_killable(&pernet_ops_rwsem);
+ if (rv < 0)
+ goto put_userns;
- net->ucounts = ucounts;
rv = setup_net(net, user_ns);
- if (rv == 0) {
- rtnl_lock();
- list_add_tail_rcu(&net->list, &net_namespace_list);
- rtnl_unlock();
- }
- mutex_unlock(&net_mutex);
+
+ up_read(&pernet_ops_rwsem);
+
if (rv < 0) {
- dec_net_namespaces(ucounts);
+put_userns:
put_user_ns(user_ns);
net_drop_ns(net);
+dec_ucounts:
+ dec_net_namespaces(ucounts);
return ERR_PTR(rv);
}
return net;
@@ -446,7 +455,7 @@ static void unhash_nsid(struct net *net, struct net *last)
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
* is executing, the list may only grow. Thus, we do not
- * use for_each_net_rcu() or rtnl_lock().
+ * use for_each_net_rcu() or net_rwsem.
*/
for_each_net(tmp) {
int id;
@@ -466,26 +475,23 @@ static void unhash_nsid(struct net *net, struct net *last)
spin_unlock_bh(&net->nsid_lock);
}
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp, *last;
- struct list_head net_kill_list;
+ struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
/* Atomically snapshot the list of namespaces to cleanup */
- spin_lock_irq(&cleanup_list_lock);
- list_replace_init(&cleanup_list, &net_kill_list);
- spin_unlock_irq(&cleanup_list_lock);
+ net_kill_list = llist_del_all(&cleanup_list);
- mutex_lock(&net_mutex);
+ down_read(&pernet_ops_rwsem);
/* Don't let anyone else find us. */
- rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list)
+ down_write(&net_rwsem);
+ llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net
* added to net_namespace_list can assign nsid pointer
@@ -498,9 +504,9 @@ static void cleanup_net(struct work_struct *work)
* useless anyway, as netns_ids are destroyed there.
*/
last = list_last_entry(&net_namespace_list, struct net, list);
- rtnl_unlock();
+ up_write(&net_rwsem);
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last);
list_add_tail(&net->exit_list, &net_exit_list);
}
@@ -520,7 +526,7 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);
- mutex_unlock(&net_mutex);
+ up_read(&pernet_ops_rwsem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
@@ -547,8 +553,8 @@ static void cleanup_net(struct work_struct *work)
*/
void net_ns_barrier(void)
{
- mutex_lock(&net_mutex);
- mutex_unlock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL(net_ns_barrier);
@@ -557,13 +563,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
- unsigned long flags;
-
- spin_lock_irqsave(&cleanup_list_lock, flags);
- list_add(&net->cleanup_list, &cleanup_list);
- spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
- queue_work(netns_wq, &net_cleanup_work);
+ if (llist_add(&net->cleanup_list, &cleanup_list))
+ queue_work(netns_wq, &net_cleanup_work);
}
EXPORT_SYMBOL_GPL(__put_net);
@@ -861,7 +862,7 @@ static int __init net_ns_init(void)
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
- SLAB_PANIC, NULL);
+ SLAB_PANIC|SLAB_ACCOUNT, NULL);
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue("netns");
@@ -875,17 +876,12 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
-
- rtnl_lock();
- list_add_tail_rcu(&init_net.list, &net_namespace_list);
- rtnl_unlock();
-
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
register_pernet_subsys(&net_ns_ops);
@@ -909,6 +905,9 @@ static int __register_pernet_operations(struct list_head *list,
list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
error = ops_init(ops, net);
if (error)
@@ -932,6 +931,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
LIST_HEAD(net_exit_list);
list_del(&ops->list);
+ /* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
ops_exit_list(ops, &net_exit_list);
@@ -996,7 +996,6 @@ again:
static void unregister_pernet_operations(struct pernet_operations *ops)
{
-
__unregister_pernet_operations(ops);
rcu_barrier();
if (ops->id)
@@ -1025,9 +1024,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1042,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -1071,11 +1070,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1090,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..7e4ede34cc52 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min_t(size_t, count, 1023);
- char tb[copy + 1];
- if (copy_from_user(tb, user_buffer, copy))
- return -EFAULT;
- tb[copy] = 0;
- pr_debug("%s,%lu buffer -:%s:-\n",
- name, (unsigned long)count, tb);
+ size_t copy = min_t(size_t, count + 1, 1024);
+ char *tp = strndup_user(user_buffer, copy);
+
+ if (IS_ERR(tp))
+ return PTR_ERR(tp);
+
+ pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp);
+ kfree(tp);
}
if (!strcmp(name, "min_pkt_size")) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..45936922d7e2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_killable(void)
+{
+ return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
@@ -406,7 +412,9 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*
- * The caller must hold the rtnl_mutex.
+ * The caller must hold the rtnl_mutex and guarantee net_namespace_list
+ * integrity (hold pernet_ops_rwsem for writing to close the race
+ * with setup_net() and cleanup_net()).
*/
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
@@ -432,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
for (;;) {
unregistering = false;
rtnl_lock();
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
if (net->dev_unreg_count > 0) {
unregistering = true;
@@ -453,12 +464,12 @@ static void rtnl_lock_unregistering_all(void)
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- /* Close the race with cleanup_net() */
- mutex_lock(&net_mutex);
+ /* Close the race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 09bd89c90a71..345b51837ca8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -857,6 +857,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
n->cloned = 1;
n->nohdr = 0;
+ n->peeked = 0;
n->destructor = NULL;
C(tail);
C(end);
@@ -890,7 +891,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
unsigned long max_pg, num_pg, new_pg, old_pg;
struct user_struct *user;
@@ -919,14 +920,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
return 0;
}
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
if (mmp->user) {
atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
free_uid(mmp->user);
}
}
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
{
@@ -3458,6 +3461,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+ skb_frag_t head_frag;
+ struct page *page;
+
+ page = virt_to_head_page(frag_skb->head);
+ head_frag.page.p = page;
+ head_frag.page_offset = frag_skb->data -
+ (unsigned char *)page_address(page);
+ head_frag.size = skb_headlen(frag_skb);
+ return head_frag;
+}
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
@@ -3662,15 +3678,19 @@ normal:
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(list_skb));
-
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+ if (!skb_headlen(list_skb)) {
+ BUG_ON(!nfrags);
+ } else {
+ BUG_ON(!list_skb->head_frag);
- BUG_ON(!nfrags);
-
+ /* to make room for head_frag. */
+ i--;
+ frag--;
+ }
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
skb_zerocopy_clone(nskb, frag_skb,
GFP_ATOMIC))
@@ -3687,7 +3707,7 @@ normal:
goto err;
}
- *nskb_frag = *frag;
+ *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
@@ -4179,7 +4199,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ sk->sk_error_report(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);
@@ -4891,7 +4911,7 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
*
* The MAC/L2 or network (IP, IPv6) headers are not accounted for.
*/
-unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
unsigned int thlen = 0;
@@ -4904,7 +4924,7 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
- } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+ } else if (unlikely(skb_is_gso_sctp(skb))) {
thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
@@ -4913,7 +4933,40 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
*/
return thlen + shinfo->gso_size;
}
-EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) -
+ skb_network_header(skb);
+
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
/**
* skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
@@ -4955,19 +5008,20 @@ static inline bool skb_gso_size_check(const struct sk_buff *skb,
}
/**
- * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
*
* @skb: GSO skb
* @mtu: MTU to validate against
*
- * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
- * once split.
+ * skb_gso_validate_network_len validates if a given skb will fit a
+ * wanted MTU once split. It considers L3 headers, L4 headers, and the
+ * payload.
*/
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
{
return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
}
-EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
/**
* skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
@@ -4986,13 +5040,18 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
+ int mac_len;
+
if (skb_cow(skb, skb_headroom(skb)) < 0) {
kfree_skb(skb);
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
- 2 * ETH_ALEN);
+ mac_len = skb->data - skb_mac_header(skb);
+ if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
+ memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+ mac_len - VLAN_HLEN - ETH_TLEN);
+ }
skb->mac_header += VLAN_HLEN;
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c501499a04fe..6444525f610c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,16 +1049,18 @@ set_rcvbuf:
break;
case SO_ZEROCOPY:
- if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ } else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
- else if (sk->sk_protocol != IPPROTO_TCP)
- ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
- else if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
+ if (!ret) {
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
break;
default:
@@ -1274,7 +1276,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
@@ -1773,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
u32 max_segs = 1;
sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2234,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
EXPORT_SYMBOL(sk_page_frag_refill);
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+ int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+ int first_coalesce)
+{
+ int sg_curr = *sg_curr_index, use = 0, rc = 0;
+ unsigned int size = *sg_curr_size;
+ struct page_frag *pfrag;
+ struct scatterlist *sge;
+
+ len -= size;
+ pfrag = sk_page_frag(sk);
+
+ while (len > 0) {
+ unsigned int orig_offset;
+
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ use = min_t(int, len, pfrag->size - pfrag->offset);
+
+ if (!sk_wmem_schedule(sk, use)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sk_mem_charge(sk, use);
+ size += use;
+ orig_offset = pfrag->offset;
+ pfrag->offset += use;
+
+ sge = sg + sg_curr - 1;
+ if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+ sg->offset + sg->length == orig_offset) {
+ sg->length += use;
+ } else {
+ sge = sg + sg_curr;
+ sg_unmark_end(sge);
+ sg_set_page(sge, pfrag->page, use, orig_offset);
+ get_page(pfrag->page);
+ sg_curr++;
+
+ if (sg_curr == MAX_SKB_FRAGS)
+ sg_curr = 0;
+
+ if (sg_curr == sg_start) {
+ rc = -ENOSPC;
+ break;
+ }
+ }
+
+ len -= use;
+ }
+out:
+ *sg_curr_size = size;
+ *sg_curr_index = sg_curr;
+ return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
@@ -2497,7 +2561,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
@@ -3261,6 +3325,27 @@ void proto_unregister(struct proto *prot)
}
EXPORT_SYMBOL(proto_unregister);
+int sock_load_diag_module(int family, int protocol)
+{
+ if (!protocol) {
+ if (!sock_is_registered(family))
+ return -ENOENT;
+
+ return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family);
+ }
+
+#ifdef CONFIG_INET
+ if (family == AF_INET &&
+ !rcu_access_pointer(inet_protos[protocol]))
+ return -ENOENT;
+#endif
+
+ return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family, protocol);
+}
+EXPORT_SYMBOL(sock_load_diag_module);
+
#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(proto_list_mutex)
@@ -3369,7 +3454,7 @@ static const struct file_operations proto_seq_fops = {
static __net_init int proto_init_net(struct net *net)
{
- if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
+ if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..c37b5be7c5e4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
if (sock_diag_handlers[req->sdiag_family] == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, req->sdiag_family);
+ sock_load_diag_module(req->sdiag_family, 0);
mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[req->sdiag_family];
@@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
if (inet_rcv_compat == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
mutex_lock(&sock_diag_table_mutex);
if (inet_rcv_compat != NULL)
@@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET6);
+ sock_load_diag_module(AF_INET6, 0);
break;
}
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..b1a2c5e38530 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -15,7 +15,6 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/kmemleak.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -32,6 +31,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +515,15 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
+ {
+ .procname = "fb_tunnels_only_for_init_net",
+ .data = &sysctl_fb_tunnels_only_for_init_net,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 93066bd0305a..d47863b07a60 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -403,6 +403,29 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
}
EXPORT_SYMBOL(inet_pton_with_scope);
+bool inet_addr_is_any(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr;
+ const struct sockaddr_in6 in6_any =
+ { .sin6_addr = IN6ADDR_ANY_INIT };
+
+ if (!memcmp(in6->sin6_addr.s6_addr,
+ in6_any.sin6_addr.s6_addr, 16))
+ return true;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *)addr;
+
+ if (in->sin_addr.s_addr == htonl(INADDR_ANY))
+ return true;
+ } else {
+ pr_warn("unexpected address family %u\n", addr->sa_family);
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(inet_addr_is_any);
+
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, bool pseudohdr)
{
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e65fcb45c3f6..b08feb219b44 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -614,6 +614,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+ ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5df7857fc0f3..6344f1b18a6a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -351,6 +351,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
ireq->ireq_family = AF_INET6;
+ ireq->ir_mark = inet_request_mark(sk, skb);
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 15bdc002d90c..84cd4e3fd01b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (skb == NULL)
goto out_release;
+ if (sk->sk_state == DCCP_CLOSED) {
+ rc = -ENOTCONN;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 791aff68af88..32751602767f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
{
struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- *uaddr_len = sizeof(struct sockaddr_dn);
-
lock_sock(sk);
if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_dn);
}
@@ -2385,7 +2383,7 @@ static int __init decnet_init(void)
dev_add_pack(&dn_dix_packet_type);
register_netdevice_notifier(&dn_dev_notifier);
- proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
+ proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
dn_register_sysctl();
out:
return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c9f5e1ebb9c8..c03b046478c3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1424,7 +1424,7 @@ void __init dn_dev_init(void)
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
NULL, dn_nl_dump_ifaddr, 0);
- proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
+ proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
#ifdef CONFIG_SYSCTL
{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 6e37d9e6345e..13156165afa3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = {
void __init dn_neigh_init(void)
{
neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_neigh", 0444, init_net.proc_net,
&dn_neigh_seq_fops);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef20b8e31669..eca0cc6b761f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1918,7 +1918,7 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
- proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_cache", 0444, init_net.proc_net,
&dn_rt_cache_seq_fops);
#ifdef CONFIG_DECNET_ROUTER
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index e1d4d898a007..8396705deffc 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei");
MODULE_LICENSE("GPL");
unsigned int dns_resolver_debug;
-module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, dns_resolver_debug, uint, 0644);
MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ unsigned int type;
+
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+
+ __skb_push(skb, ETH_HLEN);
+
+ type = ptp_classify_raw(skb);
+
+ __skb_pull(skb, ETH_HLEN);
+
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ if (likely(ds->ops->port_rxtstamp))
+ return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+ return false;
+}
+
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
s->rx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ if (dsa_skb_defer_rx_timestamp(p, skb))
+ return 0;
+
netif_receive_skb(skb);
return 0;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 70de7895e5b8..053731473c99 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -126,6 +126,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
struct dsa_switch *ds;
+ struct dsa_port *slave_port;
if (device < 0 || device >= DSA_MAX_SWITCHES)
return NULL;
@@ -137,7 +138,12 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
if (port < 0 || port >= ds->num_ports)
return NULL;
- return ds->ports[port].slave;
+ slave_port = &ds->ports[port];
+
+ if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
+ return NULL;
+
+ return slave_port->slave;
}
/* port.c */
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index cb54b81d0bd9..42a7b85b84e1 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
ds->ports[i].dn = cd->port_dn[i];
ds->ports[i].cpu_dp = dst->cpu_dp;
- if (dsa_is_user_port(ds, i))
+ if (!dsa_is_user_port(ds, i))
continue;
ret = dsa_slave_create(&ds->ports[i]);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
count += ops->get_sset_count(dev, sset);
if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, cpu_dp->index);
return count;
}
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
* constructed earlier
*/
ds->ops->get_strings(ds, port, ndata);
- count = ds->ops->get_sset_count(ds);
+ count = ds->ops->get_sset_count(ds, port);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
#include "dsa_priv.h"
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+ int port = p->dp->index;
+
+ /* Pass through to switch driver if it supports timestamping */
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ if (ds->ops->port_hwtstamp_get)
+ return ds->ops->port_hwtstamp_get(ds, port, ifr);
+ break;
+ case SIOCSHWTSTAMP:
+ if (ds->ops->port_hwtstamp_set)
+ return ds->ops->port_hwtstamp_set(ds, port, ifr);
+ break;
+ }
+
if (!dev->phydev)
return -ENODEV;
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
return NETDEV_TX_OK;
}
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ struct sk_buff *clone;
+ unsigned int type;
+
+ type = ptp_classify_raw(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
+
+ if (!ds->ops->port_txtstamp)
+ return;
+
+ clone = skb_clone_sk(skb);
+ if (!clone)
+ return;
+
+ if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+ return;
+
+ kfree_skb(clone);
+}
+
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ /* Identify PTP protocol packets, clone them, and pass them to the
+ * switch driver
+ */
+ dsa_skb_tx_timestamp(p, skb);
+
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
*/
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
count = 4;
if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, dp->index);
return count;
}
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
+static int dsa_slave_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *ts)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+
+ if (!ds->ops->get_ts_info)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
+ .get_ts_info = dsa_slave_get_ts_info,
};
/* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index d8de3bcfb103..b8d95cb71c25 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -17,37 +17,19 @@ typedef unsigned __bitwise lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ const struct ieee802154_addr src;
+ const struct ieee802154_addr dst;
};
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..275449b0d633 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
@@ -206,9 +207,13 @@ static inline void lowpan_netlink_fini(void)
static int lowpan_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct wpan_dev *wpan_dev;
- if (wdev->type != ARPHRD_IEEE802154)
+ if (ndev->type != ARPHRD_IEEE802154)
+ return NOTIFY_DONE;
+ wpan_dev = ndev->ieee802154_ptr;
+ if (!wpan_dev)
return NOTIFY_DONE;
switch (event) {
@@ -217,8 +222,8 @@ static int lowpan_device_event(struct notifier_block *unused,
* also delete possible lowpan interfaces which belongs
* to the wpan interface.
*/
- if (wdev->ieee802154_ptr->lowpan_dev)
- lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
+ if (wpan_dev->lowpan_dev)
+ lowpan_dellink(wpan_dev->lowpan_dev, NULL);
break;
default:
return NOTIFY_DONE;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..1790b65944b3 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}
static void lowpan_frag_expire(struct timer_list *t)
@@ -94,10 +62,10 @@ static void lowpan_frag_expire(struct timer_list *t)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}
static inline struct lowpan_frag_queue *
@@ -105,25 +73,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {
+ .tag = cb->d_tag,
+ .d_size = cb->d_size,
+ .src = *src,
+ .dst = *dst,
+ };
+ struct inet_frag_queue *q;
- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}
@@ -230,7 +193,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
/* Make the one we just received the head. */
if (prev) {
@@ -438,7 +401,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -448,24 +411,22 @@ err:
}
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
@@ -581,14 +542,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;
- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
@@ -597,7 +564,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}
static struct pernet_operations lowpan_frags_ops = {
@@ -605,32 +572,63 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};
-int __init lowpan_net_frag_init(void)
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
{
- int ret;
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..eaed0367e669 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
- unsigned short snum;
- int chk_addr_ret;
- u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
- err = sk->sk_prot->bind(sk, uaddr, addr_len);
- goto out;
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
}
- err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
- goto out;
+ return -EINVAL;
+
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet_bind);
+
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
+ unsigned short snum;
+ int chk_addr_ret;
+ u32 tb_id = RT_TABLE_LOCAL;
+ int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- err = -EADDRINUSE;
- goto out_release_sock;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ err = -EADDRINUSE;
+ goto out_release_sock;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
out:
return err;
}
-EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
+ int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ return err;
+ }
+
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
@@ -723,7 +758,7 @@ EXPORT_SYMBOL(inet_accept);
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -745,8 +780,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_addr.s_addr = addr;
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..bf6c2d4d4fdc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -437,7 +437,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
/*unsigned long now; */
struct net *net = dev_net(dev);
- rt = ip_route_output(net, sip, tip, 0, 0);
+ rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
@@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = {
static int __net_init arp_net_init(struct net *net)
{
- if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
+ if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 296d0b956bfe..97689012b357 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb)
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+ struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index da5635fc52c2..7cf755ef9efb 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7d36a950d961..c27122f01b87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1746,18 +1746,20 @@ void fib_select_multipath(struct fib_result *res, int hash)
bool first = false;
for_nexthops(fi) {
+ if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (!fib_good_nh(nh))
+ continue;
+ if (!first) {
+ res->nh_sel = nhsel;
+ first = true;
+ }
+ }
+
if (hash > atomic_read(&nh->nh_upper_bound))
continue;
- if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
- fib_good_nh(nh)) {
- res->nh_sel = nhsel;
- return;
- }
- if (!first) {
- res->nh_sel = nhsel;
- first = true;
- }
+ res->nh_sel = nhsel;
+ return;
} endfor_nexthops(fi);
}
#endif
@@ -1765,14 +1767,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
- bool oif_check;
-
- oif_check = (fl4->flowi4_oif == 0 ||
- fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+ if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+ goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1 && oif_check) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ if (res->fi->fib_nhs > 1) {
+ int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
@@ -1780,10 +1780,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
#endif
if (!res->prefixlen &&
res->table->tb_num_default > 1 &&
- res->type == RTN_UNICAST && oif_check)
+ res->type == RTN_UNICAST)
fib_select_default(fl4, res);
+check_saddr:
if (!fl4->saddr)
fl4->saddr = FIB_RES_PREFSRC(net, *res);
}
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..3dcffd3ce98c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
#define VERSION "0.409"
+#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
*/
static const int sync_pages = 128;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
static inline struct tnode *tn_info(struct key_vector *kv)
{
@@ -1064,6 +1065,9 @@ noleaf:
return -ENOMEM;
}
+/* fib notifier for ADD is sent before calling fib_insert_alias with
+ * the expectation that the only possible failure ENOMEM
+ */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1215,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa, extack);
+ err = call_fib_entry_notifiers(net,
+ FIB_EVENT_ENTRY_REPLACE,
+ key, plen, new_fa,
+ extack);
+ if (err)
+ goto out_free_new_fa;
+
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1262,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
+ err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_fib_notif;
if (!plen)
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
return 0;
+out_fib_notif:
+ /* notifier was sent that entry would be added to trie, but
+ * the add failed and need to recover. Only failure for
+ * fib_insert_alias is ENOMEM.
+ */
+ NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
+ plen, new_fa, NULL);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -2721,14 +2741,14 @@ static const struct file_operations fib_route_fops = {
int __net_init fib_proc_init(struct net *net)
{
- if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
+ if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
goto out1;
- if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+ if (!proc_create("fib_triestat", 0444, net->proc_net,
&fib_triestat_fops))
goto out2;
- if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+ if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
goto out3;
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..b26a81a7de42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net)
struct proc_dir_entry *pde;
int err;
- pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
+ pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
if (!pde)
goto out_igmp;
- pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+ pde = proc_create("mcfilter", 0444, net->proc_net,
&igmp_mcf_seq_fops);
if (!pde)
goto out_mcfilter;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a383f299ce24..4e5bc4b2f14e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
if (!inet_diag_table[proto])
- request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET, proto);
+ sock_load_diag_module(AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
if (!inet_diag_table[proto])
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 26a3d0315728..c9e35b81d093 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,154 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire(&fq->timer);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = READ_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -211,83 +59,75 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
-{
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
-
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
+ struct inet_frag_queue *fq = ptr;
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;
- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ refcount_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);
- return hb;
+ inet_frag_put(fq);
}
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */
- hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
refcount_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -295,6 +135,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
+ f = nf->f;
while (fp) {
struct sk_buff *xp = fp->next;
@@ -304,59 +145,20 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
}
sum = sum_truesize + f->qsize;
- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- refcount_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- refcount_inc(&qp->refcnt);
-
- refcount_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
- }
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
@@ -368,70 +170,51 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
- refcount_set(&q->refcnt, 1);
+ refcount_set(&q->refcnt, 3);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
void *arg)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
+ int err;
q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL;
- return inet_frag_intern(nf, q, f, arg);
+ mod_timer(&q->timer, jiffies + nf->timeout);
+
+ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+ f->rhash_params);
+ if (err < 0) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
+ return NULL;
+ }
+ return q;
}
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- refcount_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
+ struct inet_frag_queue *fq;
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
+ rcu_read_lock();
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
+ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (fq) {
+ if (!refcount_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ rcu_read_unlock();
+ return fq;
}
+ rcu_read_unlock();
- return ERR_PTR(-ENOBUFS);
+ return inet_frag_create(nf, key);
}
EXPORT_SYMBOL(inet_frag_find);
-
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
-{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
-
- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
-}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c3ea4906d237..88c5069b5d20 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -178,6 +178,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_dport = inet->inet_dport;
tw->tw_family = sk->sk_family;
tw->tw_reuse = sk->sk_reuse;
+ tw->tw_reuseport = sk->sk_reuseport;
tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
tw->tw_transparent = inet->transparent;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..d757b9642d0d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
+#include <linux/cache.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -51,7 +52,7 @@
* daddr: unchangeable
*/
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
void inet_peer_base_init(struct inet_peer_base *bp)
{
@@ -210,6 +211,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
if (p) {
p->daddr = *daddr;
+ p->dtime = (__u32)jiffies;
refcount_set(&p->refcnt, 2);
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2dd21c3281a1..b54b948b0596 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..8e9528ebaa8e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,27 +57,13 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -89,49 +75,9 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -140,17 +86,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -168,7 +109,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -176,7 +117,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}
static bool frag_expire_skip_icmp(u32 user)
@@ -194,8 +135,11 @@ static bool frag_expire_skip_icmp(u32 user)
static void ip_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
- struct ipq *qp;
+ const struct iphdr *iph;
+ struct sk_buff *head;
struct net *net;
+ struct ipq *qp;
+ int err;
qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -209,46 +153,38 @@ static void ip_expire(struct timer_list *t)
ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
+ head = qp->q.fragments;
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
- goto out;
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;
+
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+ skb_get(head);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
-
- clone = skb_clone(head, GFP_ATOMIC);
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
@@ -262,21 +198,20 @@ out_rcu_unlock:
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}
@@ -410,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -427,7 +362,7 @@ found:
* any overlaps are eliminated.
*/
if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+ int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@@ -444,8 +379,8 @@ found:
err = -ENOMEM;
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+ while (next && next->ip_defrag_offset < end) {
+ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
/* Eat head of the next overlapped fragment
@@ -453,7 +388,7 @@ found:
*/
if (!pskb_pull(next, i))
goto err;
- FRAG_CB(next)->offset += i;
+ next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@@ -477,7 +412,13 @@ found:
}
}
- FRAG_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -488,11 +429,6 @@ found:
else
qp->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -568,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -656,7 +592,7 @@ out_nomem:
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -731,24 +667,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
EXPORT_SYMBOL(ip_check_defrag);
#ifdef CONFIG_SYSCTL
-static int zero;
+static int dist_min;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv4.frags.high_thresh
},
{
@@ -764,7 +699,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &dist_min,
},
{ }
};
@@ -846,6 +781,8 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -870,16 +807,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
-
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ net->ipv4.frags.f = &ip4_frags;
+
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}
static struct pernet_operations ip4_frags_ops = {
@@ -887,17 +829,49 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..9c169bb2444d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt;
- flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = tun_info->key.tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -778,8 +781,14 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
tunnel->encap.type == TUNNEL_ENCAP_NONE) {
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ } else {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
+ dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
}
dev->features |= NETIF_F_LLTX;
+ } else {
+ dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+ dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
}
}
@@ -970,9 +979,6 @@ static void __gre_tunnel_init(struct net_device *dev)
t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
- dev->mtu = ETH_DATA_LEN - t_hlen - 4;
-
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -1290,8 +1296,6 @@ static int erspan_tunnel_init(struct net_device *dev)
erspan_hdr_len(tunnel->erspan_ver);
t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
- dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -1322,6 +1326,12 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+ for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
struct sock *sk = ra->sk;
/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
*/
if (sk && inet_sk(sk)->inet_num == protocol &&
(!sk->sk_bound_dev_if ||
- sk->sk_bound_dev_if == dev->ifindex) &&
- net_eq(sock_net(sk), net)) {
+ sk->sk_bound_dev_if == dev->ifindex)) {
if (ip_is_fragment(ip_hdr(skb))) {
if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
return true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e8e675be60ec..4c11b810a447 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
/* common case: seglen is <= mtu
*/
- if (skb_gso_validate_mtu(skb, mtu))
+ if (skb_gso_validate_network_len(skb, mtu))
return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length exceeds the egress MTU.
@@ -876,6 +876,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -971,11 +972,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len + 15, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len + 15,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1033,6 +1033,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue.
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1079,12 +1084,14 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ if (wmem_alloc_delta)
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1092,6 +1099,7 @@ error_efault:
error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9c41a0cef1a5..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
return -EINVAL;
- ipc->oif = src_info->ipi6_ifindex;
+ if (src_info->ipi6_ifindex)
+ ipc->oif = src_info->ipi6_ifindex;
ipc->addr = src_info->ipi6_addr.s6_addr32[3];
continue;
}
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
return -EINVAL;
info = (struct in_pktinfo *)CMSG_DATA(cmsg);
- ipc->oif = info->ipi_ifindex;
+ if (info->ipi_ifindex)
+ ipc->oif = info->ipi_ifindex;
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
@@ -320,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
return 0;
}
-
-/* Special input handler for packets caught by router alert option.
- They are selected only by protocol field, and then processed likely
- local ones; but only if someone wants them! Otherwise, router
- not running rsvpd will kill RSVP.
-
- It is user level problem, what it will make with them.
- I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
- but receiver should be enough clever f.e. to forward mtrace requests,
- sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
static void ip_ra_destroy_rcu(struct rcu_head *head)
{
struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -347,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
{
struct ip_ra_chain *ra, *new_ra;
struct ip_ra_chain __rcu **rap;
+ struct net *net = sock_net(sk);
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- for (rap = &ip_ra_chain;
- (ra = rtnl_dereference(*rap)) != NULL;
+ mutex_lock(&net->ipv4.ra_mutex);
+ for (rap = &net->ipv4.ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
+ mutex_unlock(&net->ipv4.ra_mutex);
kfree(new_ra);
return -EADDRINUSE;
}
/* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
RCU_INIT_POINTER(*rap, ra->next);
+ mutex_unlock(&net->ipv4.ra_mutex);
if (ra->destructor)
ra->destructor(sk);
@@ -377,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
}
- if (!new_ra)
+ if (!new_ra) {
+ mutex_unlock(&net->ipv4.ra_mutex);
return -ENOBUFS;
+ }
new_ra->sk = sk;
new_ra->destructor = destructor;
RCU_INIT_POINTER(new_ra->next, ra);
rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
+ mutex_unlock(&net->ipv4.ra_mutex);
return 0;
}
@@ -584,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE:
- case IP_ROUTER_ALERT:
return true;
}
return false;
@@ -637,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* If optlen==0, it is equivalent to val == 0 */
+ if (optname == IP_ROUTER_ALERT)
+ return ip_ra_control(sk, val ? 1 : 0, NULL);
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
@@ -1147,9 +1144,6 @@ mc_msf_out:
goto e_inval;
inet->mc_all = val;
break;
- case IP_ROUTER_ALERT:
- err = ip_ra_control(sk, val ? 1 : 0, NULL);
- break;
case IP_FREEBIND:
if (optlen < 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..6b0e362cc99b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -253,13 +253,14 @@ static struct net_device *__ip_tunnel_create(struct net *net,
struct net_device *dev;
char name[IFNAMSIZ];
- if (parms->name[0])
+ err = -E2BIG;
+ if (parms->name[0]) {
+ if (!dev_valid_name(parms->name))
+ goto failed;
strlcpy(name, parms->name, IFNAMSIZ);
- else {
- if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
- err = -E2BIG;
+ } else {
+ if (strlen(ops->kind) > (IFNAMSIZ - 3))
goto failed;
- }
strlcpy(name, ops->kind, IFNAMSIZ);
strncat(name, "%d", 2);
}
@@ -290,22 +291,6 @@ failed:
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -322,10 +307,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -362,13 +347,17 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel *nt;
struct net_device *dev;
int t_hlen;
+ int mtu;
+ int err;
- BUG_ON(!itn->fb_tunnel_dev);
- dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+ dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
if (IS_ERR(dev))
return ERR_CAST(dev);
- dev->mtu = ip_tunnel_bind_dev(dev);
+ mtu = ip_tunnel_bind_dev(dev);
+ err = dev_set_mtu(dev, mtu);
+ if (err)
+ goto err_dev_set_mtu;
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
@@ -376,6 +365,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
+
+err_dev_set_mtu:
+ unregister_netdevice(dev);
+ return ERR_PTR(err);
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
@@ -581,8 +574,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -710,16 +703,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- if (tunnel->fwmark) {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
- }
- else {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- skb->mark);
- }
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -845,7 +831,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
struct net *net = t->net;
struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
- BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == itn->fb_tunnel_dev) {
@@ -870,7 +855,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
p->o_key = 0;
}
- t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ t = ip_tunnel_find(itn, p, itn->type);
if (cmd == SIOCADDTUNNEL) {
if (!t) {
@@ -1014,10 +999,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct ip_tunnel_parm parms;
unsigned int i;
+ itn->rtnl_link_ops = ops;
for (i = 0; i < IP_TNL_HASH_SIZE; i++)
INIT_HLIST_HEAD(&itn->tunnels[i]);
- if (!ops) {
+ if (!ops || !net_has_fallback_tunnels(net)) {
+ struct ip_tunnel_net *it_init_net;
+
+ it_init_net = net_generic(&init_net, ip_tnl_net_id);
+ itn->type = it_init_net->type;
itn->fb_tunnel_dev = NULL;
return 0;
}
@@ -1035,6 +1025,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ itn->type = itn->fb_tunnel_dev->type;
}
rtnl_unlock();
@@ -1042,10 +1033,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+ struct list_head *head,
struct rtnl_link_ops *ops)
{
- struct net *net = dev_net(itn->fb_tunnel_dev);
struct net_device *dev, *aux;
int h;
@@ -1077,7 +1068,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(itn, &list, ops);
+ ip_tunnel_destroy(net, itn, &list, ops);
}
unregister_netdevice_many(&list);
rtnl_unlock();
@@ -1109,17 +1100,29 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt->fwmark = fwmark;
err = register_netdevice(dev);
if (err)
- goto out;
+ goto err_register_netdevice;
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
mtu = ip_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
+ if (tb[IFLA_MTU]) {
+ unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
+
+ mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
+ (unsigned int)(max - sizeof(struct iphdr)));
+ }
+
+ err = dev_set_mtu(dev, mtu);
+ if (err)
+ goto err_dev_set_mtu;
ip_tunnel_add(itn, nt);
-out:
+ return 0;
+
+err_dev_set_mtu:
+ unregister_netdevice(dev);
+err_register_netdevice:
return err;
}
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..3f091ccad9af 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -387,8 +387,6 @@ static int vti_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN;
dev->flags = IFF_NOARP;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f75802ad960f..43f620feb1c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void)
unsigned int i;
#ifdef CONFIG_PROC_FS
- proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
+ proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..2fb4de3f7f66 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -52,7 +52,6 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
* In this case data path is free of exclusive locks at all.
*/
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
@@ -619,80 +644,22 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
-static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct vif_device *vif,
- vifi_t vif_index, u32 tb_id)
-{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
static int call_ipmr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
vifi_t vif_index, u32 tb_id)
{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct mfc_cache *mfc, u32 tb_id)
-{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv4.ipmr_seq);
}
static int call_ipmr_mfc_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct mfc_cache *mfc, u32 tb_id)
{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
}
/**
@@ -760,16 +727,15 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
-void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
-EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -782,7 +748,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
@@ -806,9 +772,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +796,8 @@ static void ipmr_expire_process(struct timer_list *t)
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +808,7 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -944,6 +910,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +922,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -988,33 +947,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
@@ -1025,25 +959,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +974,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
@@ -1072,9 +984,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ c->_c.free = ipmr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
@@ -1084,8 +997,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
@@ -1098,12 +1011,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
@@ -1211,7 +1125,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
@@ -1230,12 +1144,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
@@ -1248,15 +1163,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1264,7 +1180,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1286,11 +1202,11 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mr_cache_put(&c->_c);
return 0;
}
@@ -1299,6 +1215,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
@@ -1312,10 +1229,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
@@ -1333,28 +1250,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
@@ -1377,7 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
@@ -1395,18 +1314,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -1420,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk);
struct mr_table *mrt;
- ASSERT_RTNL();
+ rtnl_lock();
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1353,7 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false);
}
}
+ rtnl_unlock();
}
/* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1418,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else {
+ /* We need to unlock here because mrtsock_destruct takes
+ * care of rtnl itself and we can't change that due to
+ * the IP_ROUTER_ALERT setsockopt which runs without it.
+ */
+ rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL);
- goto out_unlock;
+ goto out;
}
break;
case MRT_ADD_VIF:
@@ -1609,6 +1536,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
}
out_unlock:
rtnl_unlock();
+out:
return ret;
}
@@ -1698,9 +1626,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1700,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1926,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
@@ -2038,7 +1966,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +1975,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
@@ -2061,33 +1990,33 @@ forward:
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
@@ -2099,9 +2028,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
@@ -2299,62 +2228,6 @@ drop:
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2285,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
@@ -2440,7 +2313,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2322,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2462,6 +2335,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2490,7 +2371,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2634,62 +2516,8 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2774,11 @@ out:
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -2981,26 +2789,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2800,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2808,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2823,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
@@ -3041,7 +2831,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2841,8 @@ static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -3092,54 +2850,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2862,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -3185,15 +2896,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -3220,37 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net)
static int ipmr_dump(struct net *net, struct notifier_block *nb)
{
- struct mr_table *mrt;
- int err;
-
- err = ipmr_rules_dump(net, nb);
- if (err)
- return err;
-
- ipmr_for_each_table(mrt, net) {
- struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
- int vifi;
-
- /* Notifiy on table VIF entries */
- read_lock(&mrt_lock);
- for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
- if (!v->dev)
- continue;
-
- call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
- }
- read_unlock(&mrt_lock);
-
- /* Notify on table MFC entries */
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
- mrt->id);
- }
-
- return 0;
+ return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
+ ipmr_mr_table_iter, &mrt_lock);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..4fe97723b53f
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,365 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+ int (*rules_dump)(struct net *net,
+ struct notifier_block *nb),
+ struct mr_table *(*mr_iter)(struct net *net,
+ struct mr_table *mrt),
+ rwlock_t *mrt_lock)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = rules_dump(net, nb);
+ if (err)
+ return err;
+
+ for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mr_mfc *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ mr_call_vif_notifier(nb, net, family,
+ FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ mr_call_mfc_notifier(nb, net, family,
+ FIB_EVENT_ENTRY_ADD,
+ mfc, mrt->id);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mr_dump);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index dfe6fa4ea554..280048e1e395 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV4
if NF_TABLES
config NF_TABLES_IPV4
- tristate "IPv4 nf_tables support"
+ bool "IPv4 nf_tables support"
help
This option enables the IPv4 support for nf_tables.
@@ -71,7 +71,7 @@ config NFT_FIB_IPV4
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
- tristate "ARP nf_tables support"
+ bool "ARP nf_tables support"
select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 2dad20eefd26..7523ddb2566b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
-nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c
+$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic-asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
@@ -39,7 +39,6 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
-obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e3e420f3ba7b..2dc83de53f94 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -334,11 +334,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
* big jump.
*/
@@ -560,16 +555,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -781,7 +769,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -895,7 +885,7 @@ static int __do_replace(struct net *net, const char *name,
struct arpt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -925,6 +915,8 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -939,7 +931,6 @@ static int __do_replace(struct net *net, const char *name,
net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1167,7 +1158,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
struct compat_arpt_entry *iter0;
struct arpt_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1176,7 +1167,9 @@ static int translate_compat_table(struct xt_table_info **pinfo,
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e38395a8dcf2..44b308d93ec2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -402,11 +402,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -707,16 +702,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -945,7 +933,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET, info->number);
+ ret = xt_compat_init_offsets(AF_INET, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1057,7 +1047,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1087,6 +1077,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1100,7 +1092,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1418,7 +1409,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4b02ab39ebc5..2c8d313ae216 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -232,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
refcount_set(&c->refcount, 1);
- refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
@@ -251,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
- c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
@@ -263,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
c->notifier.notifier_call = clusterip_netdev_event;
err = register_netdevice_notifier(&c->notifier);
- if (!err)
+ if (!err) {
+ refcount_set(&c->entries, 1);
return c;
+ }
#ifdef CONFIG_PROC_FS
proc_remove(c->pde);
@@ -273,7 +274,7 @@ err:
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
spin_unlock_bh(&cn->lock);
- kfree(c);
+ clusterip_config_put(c);
return ERR_PTR(err);
}
@@ -496,12 +497,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return PTR_ERR(config);
}
}
- cipinfo->config = config;
ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0)
+ if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
+ clusterip_config_entry_put(par->net, config);
+ clusterip_config_put(config);
+ return ret;
+ }
if (!par->net->xt.clusterip_deprecated_warning) {
pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
@@ -509,6 +513,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
par->net->xt.clusterip_deprecated_warning = true;
}
+ cipinfo->config = config;
return ret;
}
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f75fc6b53115..690b17ef6a44 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct iphdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
@@ -384,6 +385,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -392,8 +395,10 @@ static unsigned int ipv4_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -403,6 +408,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a787d07f6cb7..7c6c20eaf4db 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -47,7 +47,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
pr_debug("Dropping evil AH tinygram.\n");
par->hotdrop = true;
- return 0;
+ return false;
}
return spi_match(ahinfo->spis[0], ahinfo->spis[1],
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..9db988f9a4d7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,8 +154,20 @@ static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+ if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *tmpl;
+
+ tmpl = nf_ct_get(skb, &ctinfo);
+ if (tmpl && nf_ct_is_template(tmpl)) {
+ /* when skipping ct, clear templates to avoid fooling
+ * later targets/matches
+ */
+ skb->_nfct = 0;
+ nf_ct_put(tmpl);
+ }
return NF_ACCEPT;
+ }
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 25d2975da156..0cd46bffa469 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
default:
return -1;
}
+ csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
@@ -185,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index e9293bdebba0..4824b1e183a1 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -108,10 +108,12 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
int doff = 0;
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
+ struct tcphdr _hdr;
+ struct udphdr *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_hdr), &_hdr);
+ iph->protocol == IPPROTO_UDP ?
+ sizeof(*hp) : sizeof(_hdr), &_hdr);
if (hp == NULL)
return NULL;
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
deleted file mode 100644
index 036c074736b0..000000000000
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/netfilter_arp.h>
-#include <net/netfilter/nf_tables.h>
-
-static unsigned int
-nft_do_chain_arp(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_unspec(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_arp = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_ARP,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_ARP_IN) |
- (1 << NF_ARP_OUT),
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- },
-};
-
-static int __init nf_tables_arp_init(void)
-{
- return nft_register_chain_type(&filter_arp);
-}
-
-static void __exit nf_tables_arp_exit(void)
-{
- nft_unregister_chain_type(&filter_arp);
-}
-
-module_init(nf_tables_arp_init);
-module_exit(nf_tables_arp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
deleted file mode 100644
index 96f955496d5f..000000000000
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-
-static unsigned int nft_do_chain_ipv4(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv4 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV4,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
-};
-
-static int __init nf_tables_ipv4_init(void)
-{
- return nft_register_chain_type(&filter_ipv4);
-}
-
-static void __exit nf_tables_ipv4_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv4);
-}
-
-module_init(nf_tables_ipv4_init);
-module_exit(nf_tables_ipv4_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f2a490981594..b5464a3f253b 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -67,7 +67,17 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv,
return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv4 = {
+static int nft_nat_ipv4_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv4_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV4,
@@ -82,15 +92,13 @@ static const struct nf_chain_type nft_chain_nat_ipv4 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv4_fn,
},
+ .init = nft_nat_ipv4_init,
+ .free = nft_nat_ipv4_free,
};
static int __init nft_chain_nat_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv4);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv4);
return 0;
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index d965c225b9f6..7d82934c46f4 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -58,7 +58,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv4 = {
+static const struct nft_chain_type nft_chain_route_ipv4 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV4,
@@ -71,7 +71,9 @@ static const struct nf_chain_type nft_chain_route_ipv4 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv4);
+ nft_register_chain_type(&nft_chain_route_ipv4);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..05e47d777009 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = {
int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
{
struct proc_dir_entry *p;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
return -ENOMEM;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..a058de677e94 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;
orphans = percpu_counter_sum_positive(&tcp_orphan_count);
@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}
@@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = {
static __net_init int ip_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat", 0444, net->proc_net,
&sockstat_seq_fops))
goto out_sockstat;
- if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
+ if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
goto out_netstat;
- if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
+ if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
goto out_snmp;
return 0;
@@ -555,4 +555,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..1b4d3355624a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
/*
* Raw sockets may have direct kernel references. Kill them.
*/
- rtnl_lock();
ip_ra_control(sk, 0, NULL);
- rtnl_unlock();
sk_common_release(sk);
}
@@ -1142,7 +1140,7 @@ static const struct file_operations raw_seq_fops = {
static __net_init int raw_init_net(struct net *net)
{
- if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
+ if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4f44d815a61..ccb25d80f679 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,7 +108,6 @@
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-#include <linux/kmemleak.h>
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
@@ -128,10 +127,11 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+
/*
* Interface to generic destination cache.
*/
@@ -378,12 +378,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
{
struct proc_dir_entry *pde;
- pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+ pde = proc_create("rt_cache", 0444, net->proc_net,
&rt_cache_seq_fops);
if (!pde)
goto err1;
- pde = proc_create("rt_cache", S_IRUGO,
+ pde = proc_create("rt_cache", 0444,
net->proc_net_stat, &rt_cpu_seq_fops);
if (!pde)
goto err2;
@@ -633,6 +633,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
{
rt->rt_pmtu = fnhe->fnhe_pmtu;
+ rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
rt->dst.expires = fnhe->fnhe_expires;
if (fnhe->fnhe_gw) {
@@ -643,7 +644,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
}
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
- u32 pmtu, unsigned long expires)
+ u32 pmtu, bool lock, unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
@@ -680,8 +681,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu)
+ if (pmtu) {
fnhe->fnhe_pmtu = pmtu;
+ fnhe->fnhe_mtu_locked = lock;
+ }
fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -705,6 +708,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
+ fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = expires;
/* Exception created; mark the cached routes for the nexthop
@@ -786,7 +790,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, jiffies + ip_rt_gc_timeout);
+ 0, false,
+ jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -930,14 +935,23 @@ out_put_peer:
static int ip_error(struct sk_buff *skb)
{
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
struct inet_peer *peer;
unsigned long now;
struct net *net;
bool send;
int code;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ if (!dev)
+ goto out;
+ }
+
+ in_dev = __in_dev_get_rcu(dev);
+
/* IP on this device is disabled. */
if (!in_dev)
goto out;
@@ -999,15 +1013,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ bool lock = false;
- if (dst_metric_locked(dst, RTAX_MTU))
+ if (ip_mtu_locked(dst))
return;
if (ipv4_mtu(dst) < mtu)
return;
- if (mtu < ip_rt_min_pmtu)
+ if (mtu < ip_rt_min_pmtu) {
+ lock = true;
mtu = ip_rt_min_pmtu;
+ }
if (rt->rt_pmtu == mtu &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1017,7 +1034,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
- update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+ update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
@@ -1270,7 +1287,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = READ_ONCE(dst->dev->mtu);
- if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+ if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
@@ -1383,7 +1400,7 @@ struct uncached_list {
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
-static void rt_add_uncached_list(struct rtable *rt)
+void rt_add_uncached_list(struct rtable *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
@@ -1394,14 +1411,8 @@ static void rt_add_uncached_list(struct rtable *rt)
spin_unlock_bh(&ul->lock);
}
-static void ipv4_dst_destroy(struct dst_entry *dst)
+void rt_del_uncached_list(struct rtable *rt)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
- struct rtable *rt = (struct rtable *) dst;
-
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1411,6 +1422,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
}
}
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+ struct rtable *rt = (struct rtable *)dst;
+
+ if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+ kfree(p);
+
+ rt_del_uncached_list(rt);
+}
+
void rt_flush_dev(struct net_device *dev)
{
struct net *net = dev_net(dev);
@@ -1506,9 +1528,9 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_is_input = 0;
rt->rt_iif = 0;
rt->rt_pmtu = 0;
+ rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
- rt->rt_table_id = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
@@ -1644,19 +1666,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
-static void set_lwt_redirect(struct rtable *rth)
-{
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
-
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1739,15 +1748,13 @@ rt_cache:
}
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1763,44 +1770,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
@@ -1824,15 +1832,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1848,17 +1861,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
return mhash >> 1;
}
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
@@ -1884,13 +1897,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
@@ -1949,6 +1963,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
@@ -1974,7 +1992,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
@@ -2016,8 +2034,6 @@ local_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res->type == RTN_UNREACHABLE) {
@@ -2246,8 +2262,6 @@ add:
return ERR_PTR(-ENOBUFS);
rth->rt_iif = orig_oif;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2269,7 +2283,7 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
return rth;
}
@@ -2282,13 +2296,14 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
__u8 tos = RT_FL_TOS(fl4);
- struct fib_result res;
+ struct fib_result res = {
+ .type = RTN_UNSPEC,
+ .fi = NULL,
+ .table = NULL,
+ .tclassid = 0,
+ };
struct rtable *rth;
- res.tclassid = 0;
- res.fi = NULL;
- res.table = NULL;
-
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@ -2531,6 +2546,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
+ rt->rt_mtu_locked = ort->rt_mtu_locked;
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
@@ -2633,6 +2649,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
if (rt->rt_pmtu && expires)
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+ if (rt->rt_mtu_locked && expires)
+ metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
@@ -2777,7 +2795,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
- table_id = rt->rt_table_id;
+ table_id = res.table ? res.table->tb_id : 0;
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
@@ -2818,6 +2836,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
+static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
void __user *buffer,
@@ -2933,7 +2952,8 @@ static struct ctl_table ipv4_route_table[] = {
.data = &ip_rt_min_pmtu,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &ip_min_valid_pmtu,
},
{
.procname = "min_adv_mss",
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fda37f2862c9..c3387dfd725b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -349,6 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = 0;
treq->tfo_listener = false;
+ if (IS_ENABLED(CONFIG_SMC))
+ ireq->smc_ok = 0;
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..4b195bac8ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret;
}
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {
- .procname = "udp_rmem_min",
- .data = &sysctl_udp_rmem_min,
- .maxlen = sizeof(sysctl_udp_rmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
- {
- .procname = "udp_wmem_min",
- .data = &sysctl_udp_wmem_min,
- .maxlen = sizeof(sysctl_udp_wmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
{ }
};
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "udp_rmem_min",
+ .data = &init_net.ipv4.sysctl_udp_rmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
+ .procname = "udp_wmem_min",
+ .data = &init_net.ipv4.sysctl_udp_wmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48636aee23c3..bccc4c270087 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -484,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
+static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+ int target, struct sock *sk)
+{
+ return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ (sk->sk_prot->stream_memory_read ?
+ sk->sk_prot->stream_memory_read(sk) : false);
+}
+
/*
* Wait for a TCP event.
*
@@ -553,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- if (tp->rcv_nxt - tp->copied_seq >= target)
+ if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
@@ -897,7 +906,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
- if (!large_allowed || !sk_can_gso(sk))
+ if (!large_allowed)
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
@@ -993,7 +1002,9 @@ new_segment:
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
@@ -1062,8 +1073,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !sk_check_csum_caps(sk))
+ if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
@@ -1102,27 +1112,11 @@ static int linear_payload_sz(bool first_skb)
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int tmp = tp->mss_cache;
-
- if (sg) {
- if (zc)
- return 0;
-
- if (sk_can_gso(sk)) {
- tmp = linear_payload_sz(first_skb);
- } else {
- int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
- if (tmp >= pgbreak &&
- tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
- tmp = pgbreak;
- }
- }
-
- return tmp;
+ if (zc)
+ return 0;
+ return linear_payload_sz(first_skb);
}
void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1181,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg, zc = false;
+ bool zc = false;
long timeo;
flags = msg->msg_flags;
@@ -1205,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
}
@@ -1268,18 +1262,12 @@ restart:
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
while (msg_data_left(msg)) {
int copy = 0;
- int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (skb) {
- if (skb->ip_summed == CHECKSUM_NONE)
- max = mss_now;
- copy = max - skb->len;
- }
+ if (skb)
+ copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
@@ -1297,22 +1285,17 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- linear = select_size(sk, sg, first_skb, zc);
+ linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
process_backlog = true;
- /*
- * Check whether we can use HW checksum.
- */
- if (sk_check_csum_caps(sk))
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
- max = size_goal;
/* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to
@@ -1343,7 +1326,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1396,7 +1379,7 @@ new_segment:
goto out;
}
- if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+ if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
@@ -3058,8 +3041,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 3 * nla_total_size(sizeof(u32)) +
- 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+ 5 * nla_total_size(sizeof(u32)) +
+ 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -3088,6 +3071,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+ nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+ nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
return stats;
}
@@ -3566,6 +3553,7 @@ int tcp_abort(struct sock *sk, int err)
bh_unlock_sock(sk);
local_bh_enable();
+ tcp_write_queue_purge(sk);
release_sock(sk);
return 0;
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
+ tcp_sk(sk)->snd_ssthresh =
+ bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
- return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
+ return tcp_sk(sk)->snd_ssthresh;
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 7c843578f233..faddf4f9a707 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
* The algorithm is described in:
* "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
* for High-Speed Networks"
- * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
+ * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
*
* Implemented from description in paper and ns-2 simulation.
* Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575d3c1fb6e8..367def6ddeda 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
int len;
int in_sack;
- if (!sk_can_gso(sk))
- goto fallback;
-
/* Normally R but no L won't result in plain S */
if (!dup_sack &&
(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -1971,11 +1968,6 @@ void tcp_enter_loss(struct sock *sk)
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
- *
- * In theory F-RTO can be used repeatedly during loss recovery.
- * In practice this interacts badly with broken middle-boxes that
- * falsely raise the receive window, which results in repeated
- * timeouts and stop-and-go behavior.
*/
tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
@@ -2631,18 +2623,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
tcp_try_undo_loss(sk, false))
return;
- /* The ACK (s)acks some never-retransmitted data meaning not all
- * the data packets before the timeout were lost. Therefore we
- * undo the congestion window and state. This is essentially
- * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since
- * a retransmitted skb is permantly marked, we can apply such an
- * operation even if F-RTO was not used.
- */
- if ((flag & FLAG_ORIG_SACK_ACKED) &&
- tcp_try_undo_loss(sk, tp->undo_marker))
- return;
-
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if ((flag & FLAG_ORIG_SACK_ACKED) &&
+ tcp_try_undo_loss(sk, true))
+ return;
+
if (after(tp->snd_nxt, tp->high_seq)) {
if (flag & FLAG_DATA_SACKED || is_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
@@ -4001,6 +3989,7 @@ void tcp_reset(struct sock *sk)
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
+ tcp_write_queue_purge(sk);
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
@@ -5870,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
+ bool req_stolen;
+
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen))
goto discard;
}
@@ -6264,6 +6255,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
+ if (IS_ENABLED(CONFIG_SMC) && want_cookie)
+ tmp_opt.smc_ok = 0;
+
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..f70586b50838 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v4_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+}
+
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -561,16 +576,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v4_check(skb->len, saddr, daddr,
- csum_partial(th,
- th->doff << 2,
- skb->csum));
- }
+ th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
}
/* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1680,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
@@ -1694,10 +1703,20 @@ process:
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v4_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
@@ -2211,7 +2230,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
afinfo->seq_ops.next = tcp_seq_next;
afinfo->seq_ops.stop = tcp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2404,6 +2423,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..57b5468b5139 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -332,6 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_update_metrics(sk);
tcp_done(sk);
}
+EXPORT_SYMBOL(tcp_time_wait);
void tcp_twsk_destructor(struct sock *sk)
{
@@ -578,7 +579,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- bool fastopen)
+ bool fastopen, bool *req_stolen)
{
struct tcp_options_received tmp_opt;
struct sock *child;
@@ -785,6 +786,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
+ *req_stolen = !own_req;
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6818042cd8a9..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
- if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+ if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
tcp_skb_fragment_eor(skb, buff);
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len,
- skb_put(buff, nsize),
- nsize, 0);
-
- skb_trim(skb, len);
-
- skb->csum = csum_block_sub(skb->csum, buff->csum, len);
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb_split(skb, buff, len);
- }
+ skb_split(skb, buff, len);
- buff->ip_summed = skb->ip_summed;
+ buff->ip_summed = CHECKSUM_PARTIAL;
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
@@ -1732,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
- if (!tso_segs)
- tso_segs = tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
@@ -1902,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+ buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2135,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- nskb->ip_summed = skb->ip_summed;
+ nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
@@ -2143,14 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed) {
- skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- } else {
- __wsum csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, 0);
- nskb->csum = csum_block_add(nskb->csum, csum, len);
- }
+ skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
@@ -2167,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_partial(skb->data,
- skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now);
@@ -2747,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
}
tcp_highest_sack_replace(sk, next_skb, skb);
- if (next_skb->ip_summed == CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 71fc60f1b326..f7d944855f8e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk)
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
+ tcp_write_queue_purge(sk);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e5ef7c38c934..24b5c59b1c53 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
@@ -1664,6 +1658,19 @@ csum_copy_err:
goto try_again;
}
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ /* This check is replicated from __ip4_datagram_connect() and
+ * intended to prevent BPF program called below from accessing bytes
+ * that are out of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+EXPORT_SYMBOL(udp_pre_connect);
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2533,35 +2540,36 @@ int udp_abort(struct sock *sk, int err)
EXPORT_SYMBOL_GPL(udp_abort);
struct proto udp_prot = {
- .name = "UDP",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udp_destroy_sock,
- .setsockopt = udp_setsockopt,
- .getsockopt = udp_getsockopt,
- .sendmsg = udp_sendmsg,
- .recvmsg = udp_recvmsg,
- .sendpage = udp_sendpage,
- .release_cb = ip4_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v4_rehash,
- .get_port = udp_v4_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp_sock),
- .h.udp_table = &udp_table,
+ .name = "UDP",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .pre_connect = udp_pre_connect,
+ .connect = ip4_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udp_destroy_sock,
+ .setsockopt = udp_setsockopt,
+ .getsockopt = udp_getsockopt,
+ .sendmsg = udp_sendmsg,
+ .recvmsg = udp_recvmsg,
+ .sendpage = udp_sendpage,
+ .release_cb = ip4_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v4_rehash,
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
+ .compat_setsockopt = compat_udp_setsockopt,
+ .compat_getsockopt = compat_udp_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2679,7 +2687,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
afinfo->seq_ops.next = udp_seq_next;
afinfo->seq_ops.stop = udp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2830,6 +2838,26 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
+static void __udp_sysctl_init(struct net *net)
+{
+ net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+ net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+ __udp_sysctl_init(net);
+ return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+ .init = udp_sysctl_init,
+};
+
void __init udp_init(void)
{
unsigned long limit;
@@ -2842,8 +2870,7 @@ void __init udp_init(void)
sysctl_udp_mem[1] = limit;
sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+ __udp_sysctl_init(&init_net);
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2853,4 +2880,7 @@ void __init udp_init(void)
panic("UDP: failed to alloc udp_busylocks\n");
for (i = 0; i < (1U << udp_busylocks_log); i++)
spin_lock_init(udp_busylocks + i);
+
+ if (register_pernet_subsys(&udp_sysctl_ops))
+ panic("UDP: failed to init sysctl parameters.\n");
}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 63faeee989a9..2a9764bd1719 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
- eth_hdr(skb)->h_proto = skb->protocol;
+ if (skb->mac_len)
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 94b8702603bc..be980c195fc5 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
mtu = dst_mtu(skb_dst(skb));
if ((!skb_is_gso(skb) && skb->len > mtu) ||
- (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
skb->protocol = htons(ETH_P_IP);
if (skb->sk)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05017e2c849c..d73a6d6652f6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,8 +100,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
- xdst->u.rt.rt_table_id = rt->rt_table_id;
+ xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+ rt_add_uncached_list(&xdst->u.rt);
return 0;
}
@@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
dst_destroy_metrics_generic(dst);
-
+ if (xdst->u.rt.rt_uncached_list)
+ rt_del_uncached_list(&xdst->u.rt);
xfrm_dst_destroy(xdst);
}
@@ -379,4 +381,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
#include <linux/seq_file.h>
#include <linux/export.h>
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
#define INFINITY_LIFE_TIME 0xFFFFFFFF
#define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+ __func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
goto err_release;
}
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
- ADBG("ipv6_add_addr: already assigned\n");
+ netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
- ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1459,6 +1447,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1836,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
- return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+ return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+ strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ * 1. does the address exist in the L3 domain that dev is part of
+ * (skip_dev_check = true), or
+ *
+ * 2. does the address exist on the specific device
+ * (skip_dev_check = false)
+ */
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
- const struct net_device *dev, int strict,
- u32 banned_flags)
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
+ const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
+
+ l3mdev = l3mdev_master_dev_rcu(dev);
+ if (skip_dev_check)
+ dev = NULL;
+
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+
+ if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+ continue;
+
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
@@ -1968,6 +1991,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -2581,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG("addrconf: prefix option too short\n");
+ netdev_dbg(dev, "addrconf: prefix option too short\n");
return;
}
@@ -4244,7 +4269,7 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+ if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
@@ -4408,8 +4433,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched);
+ pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
@@ -4500,6 +4525,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4601,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4606,7 +4635,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..8da0b513f188 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net *net = sock_net(sk);
- __be32 v4addr = 0;
- unsigned short snum;
- bool saved_ipv6only;
- int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ bool saved_ipv6only;
+ int addr_type = 0;
+ int err = 0;
+
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
- if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- err = -EADDRINUSE;
- goto out;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
-EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
@@ -470,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -500,8 +525,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
@@ -869,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .inet6_bind = __inet6_bind,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -1025,6 +1053,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
+ ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(net, addr, NULL, 0))
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ }
if (!dev) {
err = -ENODEV;
@@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+ if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..a02ad100f0d7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -106,14 +106,7 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
}
}
- ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
- &sk->sk_v6_daddr : NULL,
-#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
- &np->saddr :
-#endif
- NULL);
+ ip6_sk_dst_store_flow(sk, dst, &fl6);
out:
fl6_sock_release(flowlabel);
@@ -146,10 +139,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *daddr;
+ struct in6_addr *daddr, old_daddr;
+ __be32 fl6_flowlabel = 0;
+ __be32 old_fl6_flowlabel;
+ __be16 old_dport;
int addr_type;
int err;
- __be32 fl6_flowlabel = 0;
if (usin->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
@@ -238,9 +233,13 @@ ipv4_connected:
}
}
+ /* save the current peer information before updating it */
+ old_daddr = sk->sk_v6_daddr;
+ old_fl6_flowlabel = np->flow_label;
+ old_dport = inet->inet_dport;
+
sk->sk_v6_daddr = *daddr;
np->flow_label = fl6_flowlabel;
-
inet->inet_dport = usin->sin6_port;
/*
@@ -250,11 +249,12 @@ ipv4_connected:
err = ip6_datagram_dst_update(sk, true);
if (err) {
- /* Reset daddr and dport so that udp_v6_early_demux()
- * fails to find this socket
+ /* Restore the socket peer info, to keep it consistent with
+ * the old socket state
*/
- memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
- inet->inet_dport = 0;
+ sk->sk_v6_daddr = old_daddr;
+ np->flow_label = old_fl6_flowlabel;
+ inet->inet_dport = old_dport;
goto out;
}
@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 3fd1ec775dc2..27f59b61f70f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
@@ -1006,12 +1007,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+
rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1035,12 +1040,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
&ip6fl_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..69727bc168cb 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
struct ip6_tnl *t, *cand = NULL;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB) ||
- gre_proto == htons(ETH_P_ERSPAN)) ?
+ gre_proto == htons(ETH_P_ERSPAN) ||
+ gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
@@ -236,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
@@ -334,11 +335,13 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (t || !create)
return t;
- if (parms->name[0])
+ if (parms->name[0]) {
+ if (!dev_valid_name(parms->name))
+ return NULL;
strlcpy(name, parms->name, IFNAMSIZ);
- else
+ } else {
strcpy(name, "ip6gre%d");
-
+ }
dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
if (!dev)
@@ -695,9 +698,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
@@ -720,14 +720,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
@@ -902,6 +908,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
truncate = true;
}
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto tx_err;
+
t->parms.o_flags &= ~TUNNEL_KEY;
IPCB(skb)->flags = 0;
@@ -944,6 +953,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
md->u.md2.dir,
get_hwid(&md->u.md2),
truncate, false);
+ } else {
+ goto tx_err;
}
} else {
switch (skb->protocol) {
@@ -1053,7 +1064,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -1469,6 +1480,8 @@ static int __net_init ip6gre_init_net(struct net *net)
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
@@ -1751,7 +1764,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->mtu -= 8;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
return 0;
@@ -1784,6 +1796,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 997c7f19ad62..2e891d2c30ef 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -375,6 +375,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ struct dst_entry *dst = skb_dst(skb);
+
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+
return dst_output(net, sk, skb);
}
@@ -412,7 +417,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
@@ -569,8 +574,6 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
- __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
@@ -1105,23 +1108,32 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
* @sk: socket which provides the dst cache and route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
+ * @connected: whether @sk is connected or not
*
* This function performs a route lookup on the given flow with the
* possibility of using the cached route in the socket if it is valid.
* It will take the socket dst lock when operating on the dst cache.
* As a result, this function can only be used in process context.
*
+ * In addition, for a connected socket, cache the dst in the socket
+ * if the current cache is not valid.
+ *
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
- const struct in6_addr *final_dst)
+ const struct in6_addr *final_dst,
+ bool connected)
{
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
dst = ip6_sk_dst_check(sk, dst, fl6);
- if (!dst)
- dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
+ if (dst)
+ return dst;
+
+ dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
+ if (connected && !IS_ERR(dst))
+ ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
return dst;
}
@@ -1246,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk,
const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
@@ -1259,6 +1271,7 @@ static int __ip6_append_data(struct sock *sk,
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
+ unsigned int wmem_alloc_delta = 0;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1282,6 +1295,12 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+ /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
+ * the first fragment
+ */
+ if (headersize + transhdrlen > mtu)
+ goto emsgsize;
+
if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_RAW)) {
@@ -1297,9 +1316,8 @@ static int __ip6_append_data(struct sock *sk,
if (cork->length + length > maxnonfragsize - headersize) {
emsgsize:
- ipv6_local_error(sk, EMSGSIZE, fl6,
- mtu - headersize +
- sizeof(struct ipv6hdr));
+ pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
+ ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
return -EMSGSIZE;
}
@@ -1411,11 +1429,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1474,6 +1491,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1520,12 +1542,14 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ if (wmem_alloc_delta)
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1533,6 +1557,7 @@ error_efault:
error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4b15fe928278..da66aaac51ce 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -297,13 +297,16 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
struct net_device *dev;
struct ip6_tnl *t;
char name[IFNAMSIZ];
- int err = -ENOMEM;
+ int err = -E2BIG;
- if (p->name[0])
+ if (p->name[0]) {
+ if (!dev_valid_name(p->name))
+ goto failed;
strlcpy(name, p->name, IFNAMSIZ);
- else
+ } else {
sprintf(name, "ip6tnl%%d");
-
+ }
+ err = -ENOMEM;
dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
ip6_tnl_dev_setup);
if (!dev)
@@ -679,7 +682,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -758,9 +761,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+ likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+ 0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
@@ -990,12 +995,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -1444,7 +1451,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -1982,14 +1989,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
{
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct ip6_tnl *nt, *t;
struct ip_tunnel_encap ipencap;
+ struct ip6_tnl *nt, *t;
+ int err;
nt = netdev_priv(dev);
if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
- int err = ip6_tnl_encap_setup(nt, &ipencap);
-
+ err = ip6_tnl_encap_setup(nt, &ipencap);
if (err < 0)
return err;
}
@@ -2005,7 +2012,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
return -EEXIST;
}
- return ip6_tnl_create2(dev);
+ err = ip6_tnl_create2(dev);
+ if (!err && tb[IFLA_MTU])
+ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
+ return err;
}
static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -2201,6 +2212,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..c214ffec02f0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -212,10 +212,13 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
char name[IFNAMSIZ];
int err;
- if (p->name[0])
+ if (p->name[0]) {
+ if (!dev_valid_name(p->name))
+ goto failed;
strlcpy(name, p->name, IFNAMSIZ);
- else
+ } else {
sprintf(name, "ip6_vti%%d");
+ }
dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
if (!dev)
@@ -622,11 +625,12 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
-static void vti6_link_config(struct ip6_tnl *t)
+static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
{
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
struct net_device *tdev = NULL;
+ int mtu;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -640,12 +644,17 @@ static void vti6_link_config(struct ip6_tnl *t)
else
dev->flags &= ~IFF_POINTOPOINT;
+ if (keep_mtu && dev->mtu) {
+ dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+ return;
+ }
+
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
@@ -656,20 +665,25 @@ static void vti6_link_config(struct ip6_tnl *t)
tdev = __dev_get_by_index(t->net, p->link);
if (tdev)
- dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
- IPV6_MIN_MTU);
+ mtu = tdev->mtu - sizeof(struct ipv6hdr);
+ else
+ mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr);
+
+ dev->mtu = max_t(int, mtu, IPV6_MIN_MTU);
}
/**
* vti6_tnl_change - update the tunnel parameters
* @t: tunnel to be changed
* @p: tunnel configuration parameters
+ * @keep_mtu: MTU was set from userspace, don't re-compute it
*
* Description:
* vti6_tnl_change() updates the tunnel parameters
**/
static int
-vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
+ bool keep_mtu)
{
t->parms.laddr = p->laddr;
t->parms.raddr = p->raddr;
@@ -679,11 +693,12 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.proto = p->proto;
t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
- vti6_link_config(t);
+ vti6_link_config(t, keep_mtu);
return 0;
}
-static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p,
+ bool keep_mtu)
{
struct net *net = dev_net(t->dev);
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
@@ -691,7 +706,7 @@ static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
vti6_tnl_unlink(ip6n, t);
synchronize_net();
- err = vti6_tnl_change(t, p);
+ err = vti6_tnl_change(t, p, keep_mtu);
vti6_tnl_link(ip6n, t);
netdev_state_change(t->dev);
return err;
@@ -804,7 +819,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- err = vti6_update(t, &p1);
+ err = vti6_update(t, &p1, false);
}
if (t) {
err = 0;
@@ -866,10 +881,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->priv_destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
- dev->mtu = ETH_DATA_LEN;
dev->min_mtu = IPV6_MIN_MTU;
- dev->max_mtu = IP_MAX_MTU;
+ dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
netif_keep_dst(dev);
@@ -905,7 +918,7 @@ static int vti6_dev_init(struct net_device *dev)
if (err)
return err;
- vti6_link_config(t);
+ vti6_link_config(t, true);
return 0;
}
@@ -1010,7 +1023,7 @@ static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
} else
t = netdev_priv(dev);
- return vti6_update(t, &p);
+ return vti6_update(t, &p, tb && tb[IFLA_MTU]);
}
static size_t vti6_get_size(const struct net_device *dev)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
{
- struct mr6_table *mrt;
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -268,17 +258,42 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv6.mr6_rules_ops);
rtnl_unlock();
}
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+ rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
#else
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -297,114 +312,87 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
net->ipv6.mrt6 = NULL;
rtnl_unlock();
}
-#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
{
- struct mr6_table *mrt;
- unsigned int i;
-
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
+ return 0;
+}
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+#endif
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
-}
-
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
}
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
+ struct mr_table *mrt;
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +401,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +412,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +434,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +442,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +454,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +475,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +508,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +535,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +569,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +604,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +647,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -769,21 +680,41 @@ failure:
}
#endif
-/*
- * Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ mifi_t vif_index, u32 tb_id)
+{
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv6.ipmr_seq);
+}
+
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc6_cache *mfc, u32 tb_id)
+{
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+/* Delete a VIF entry */
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
+
+ if (VIF_EXISTS(mrt, vifi))
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +733,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +758,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +800,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +816,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +841,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +852,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +864,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +919,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1006,78 +934,63 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
if (vifi + 1 > mrt->maxvif)
mrt->maxvif = vifi + 1;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
+ c->_c.free = ip6mr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
return c;
}
@@ -1086,8 +999,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +1008,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +1017,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +1043,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1116,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1137,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1167,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1188,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1211,26 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL, c, mrt->id);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ mr_cache_put(&c->_c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1238,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1349,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+ return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+ ip6mr_mr_table_iter, &mrt_lock);
+}
+
static struct notifier_block ip6_mr_notifier = {
.notifier_call = ip6mr_device_event
};
-/*
- * Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IP6MR,
+ .fib_seq_read = ip6mr_seq_read,
+ .fib_dump = ip6mr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv6.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ net->ipv6.ip6mr_notifier_ops = ops;
+
+ return 0;
+}
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+ net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
static int __net_init ip6mr_net_init(struct net *net)
{
int err;
+ err = ip6mr_notifier_init(net);
+ if (err)
+ return err;
+
err = ip6mr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ip6mr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -1381,7 +1330,8 @@ proc_cache_fail:
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
-fail:
+ip6mr_rules_fail:
+ ip6mr_notifier_exit(net);
return err;
}
@@ -1392,6 +1342,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
+ ip6mr_notifier_exit(net);
}
static struct pernet_operations ip6mr_net_ops = {
@@ -1452,14 +1403,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1468,28 +1419,22 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
}
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
- }
-
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1504,31 +1449,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1536,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1544,61 +1496,59 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c,
+ mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1616,7 +1566,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1574,13 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+ /* Note that mroute_sk had SOCK_RCU_FREE set,
+ * so the RCU grace period before sk freeing
+ * is guaranteed by sk_destruct()
+ */
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1598,9 @@ int ip6mr_sk_done(struct sock *sk)
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1656,8 +1610,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1628,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1639,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1706,7 +1662,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1741,7 +1698,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1793,7 +1752,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1824,7 +1783,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1831,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1888,8 +1847,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1906,19 +1865,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1946,10 +1905,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1962,8 +1921,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1980,19 +1939,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2013,11 +1972,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2087,46 +2046,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +2098,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2197,7 +2163,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2247,66 +2213,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2367,15 +2278,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2397,7 +2305,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2314,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2419,6 +2327,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2440,14 +2356,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2482,7 +2398,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2532,65 +2448,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ if (!proc_create("mcfilter6", 0444, net->proc_net,
&igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f61a5b613b52..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
}
if (!dev->addr_len)
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
@@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
*(opt++) = (rd_len >> 3);
opt += 6;
- memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+ skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
+ rd_len - 8);
}
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d95ceca7ff8f..531d6957af36 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -21,18 +21,19 @@
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sock *sk = sk_to_full_sk(skb->sk);
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
- .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
.flowi6_mark = skb->mark,
- .flowi6_uid = sock_net_uid(net, skb->sk),
+ .flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
.saddr = iph->saddr,
};
int err;
- dst = ip6_route_output(net, skb->sk, &fl6);
+ dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -50,7 +51,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
skb_dst_set(skb, dst);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d395d1590699..ccbfa83e4bb0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV6
if NF_TABLES
config NF_TABLES_IPV6
- tristate "IPv6 nf_tables support"
+ bool "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d984057b8395..44273d6f03a5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..65c9e1a58305 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -725,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -962,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET6, info->number);
+ ret = xt_compat_init_offsets(AF_INET6, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1075,7 +1065,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1105,6 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1118,7 +1110,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1425,7 +1416,7 @@ translate_compat_table(struct net *net,
struct compat_ip6t_entry *iter0;
struct ip6t_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1434,7 +1425,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 437af8c95277..cb6d42b03cb5 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct ipv6hdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb,
@@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 94deb69bbbda..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -48,12 +48,8 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
}
fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- if ((flags & XT_RPFILTER_LOOSE) == 0) {
- fl6.flowi6_oif = dev->ifindex;
- lookup_flags |= RT6_LOOKUP_F_IFACE;
- }
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..5e0332014c17 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -52,18 +52,9 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +67,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +142,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -178,34 +151,26 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -264,7 +229,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -295,13 +260,13 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -317,14 +282,19 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- NFCT_FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ if (skb->dev)
+ fq->iif = skb->dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -335,10 +305,6 @@ found:
else
fq->q.fragments = skb;
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -357,7 +323,7 @@ found:
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -379,10 +345,10 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -593,8 +559,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -622,25 +588,33 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
@@ -652,13 +626,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index d346705d6ee6..207cb35569b1 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -178,7 +178,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->len <= mtu)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index bed57ee65f7b..6b7f075f811f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -99,6 +99,10 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
target, maniptype))
return false;
+
+ /* must reload, offset might have changed */
+ ipv6h = (void *)skb->data + iphdroff;
+
manip_addr:
if (maniptype == NF_NAT_MANIP_SRC)
ipv6h->saddr = target->src.u3.in6;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index ebb2bf84232a..f14de4b6d639 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -116,9 +116,11 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
}
if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
+ struct tcphdr _hdr;
+ struct udphdr *hp;
- hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+ hp = skb_header_pointer(skb, thoff, tproto == IPPROTO_UDP ?
+ sizeof(*hp) : sizeof(_hdr), &_hdr);
if (hp == NULL)
return NULL;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
deleted file mode 100644
index 17e03589331c..000000000000
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int nft_do_chain_ipv6(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv6 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-
-static int __init nf_tables_ipv6_init(void)
-{
- return nft_register_chain_type(&filter_ipv6);
-}
-
-static void __exit nf_tables_ipv6_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv6);
-}
-
-module_init(nf_tables_ipv6_init);
-module_exit(nf_tables_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 73fe2bd13fcf..3557b114446c 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,7 +65,17 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv6 = {
+static int nft_nat_ipv6_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv6_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV6,
@@ -80,15 +90,13 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
+ .init = nft_nat_ipv6_init,
+ .free = nft_nat_ipv6_free,
};
static int __init nft_chain_nat_ipv6_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv6);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv6);
return 0;
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 11d3c3b9aa18..da3f1f8cb325 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv6 = {
+static const struct nft_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
@@ -73,7 +73,9 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv6);
+ nft_register_chain_type(&nft_chain_route_ipv6);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index cc5174c7254c..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,8 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
*dest = 0;
- again:
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
@@ -189,15 +189,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev) {
- /* multipath route? Try again with F_IFACE */
- if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
- lookup_flags |= RT6_LOOKUP_F_IFACE;
- fl6.flowi6_oif = oif->ifindex;
- ip6_rt_put(rt);
- goto again;
- }
- }
+ if (oif && oif != rt->rt6i_idev->dev)
+ goto put_rt_err;
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d12c55dad7d1..746eeae7f581 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -121,7 +121,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.tclass = np->tclass;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
rt = (struct rt6_info *) dst;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..a85f7e0b14b1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
@@ -290,7 +291,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!net->mib.proc_net_devsnmp6)
return -ENOENT;
- p = proc_create_data(idev->dev->name, S_IRUGO,
+ p = proc_create_data(idev->dev->name, 0444,
net->mib.proc_net_devsnmp6,
&snmp6_dev_seq_fops, idev);
if (!p)
@@ -314,11 +315,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat6", 0444, net->proc_net,
&sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+ if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+ if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..4979610287e2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -62,13 +62,6 @@
static const char ip6_frag_cache_name[] = "ip6-frags";
-struct ip6frag_skb_cb {
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
-
static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -79,94 +72,58 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;
- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;
+ rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);
- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+out_rcu_unlock:
+ rcu_read_unlock();
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
@@ -179,31 +136,29 @@ static void ip6_frag_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}
static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
- hash = inet6_hash_frag(id, src, dst);
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -288,13 +243,13 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -309,14 +264,20 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ fq->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -327,11 +288,6 @@ found:
else
fq->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- fq->iif = dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -364,7 +320,7 @@ found:
return -1;
discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -391,7 +347,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -418,7 +374,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
WARN_ON(head == NULL);
- WARN_ON(FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
@@ -531,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -559,17 +516,18 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;
spin_lock(&fq->q.lock);
+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -590,24 +548,22 @@ static const struct inet6_protocol frag_protocol = {
};
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv6.frags.high_thresh
},
{
@@ -650,10 +606,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
@@ -715,19 +667,27 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.f = &ip6_frags;
- inet_frags_init_net(&net->ipv6.frags);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res < 0)
+ return res;
- return ip6_frags_ns_sysctl_register(net);
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv6.frags);
+ return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
}
static struct pernet_operations ip6_frags_ops = {
@@ -735,14 +695,55 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v6,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v6_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6_key_hashfn,
+ .obj_hashfn = ip6_obj_hashfn,
+ .obj_cmpfn = ip6_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
int __init ipv6_frag_init(void)
{
int ret;
- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ip6_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto err_protocol;
+
ret = ip6_frags_sysctl_register();
if (ret)
goto err_sysctl;
@@ -751,16 +752,6 @@ int __init ipv6_frag_init(void)
if (ret)
goto err_pernet;
- ip6_frags.hashfn = ip6_hashfn;
- ip6_frags.constructor = ip6_frag_init;
- ip6_frags.destructor = NULL;
- ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
- ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.frags_cache_name = ip6_frag_cache_name;
- ret = inet_frags_init(&ip6_frags);
- if (ret)
- goto err_pernet;
out:
return ret;
@@ -768,6 +759,8 @@ err_pernet:
ip6_frags_sysctl_unregister();
err_sysctl:
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+ inet_frags_fini(&ip6_frags);
goto out;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadddd800..49b954d6d0fa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,7 +128,7 @@ struct uncached_list {
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
-static void rt6_uncached_list_add(struct rt6_info *rt)
+void rt6_uncached_list_add(struct rt6_info *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
@@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
spin_unlock_bh(&ul->lock);
}
-static void rt6_uncached_list_del(struct rt6_info *rt)
+void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+ struct rt6_info *match,
struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
int strict)
{
struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
@@ -914,11 +916,16 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ flags &= ~RT6_LOOKUP_F_IFACE;
+
rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
@@ -929,8 +936,8 @@ restart:
rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6,
- fl6->flowi6_oif, flags);
+ rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+ skb, flags);
}
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +961,15 @@ restart:
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags)
+ const struct sk_buff *skb, int flags)
{
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int strict)
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int strict)
{
struct flowi6 fl6 = {
.flowi6_oif = oif,
@@ -975,7 +983,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
@@ -1509,7 +1517,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
}
}
-static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
+ struct rt6_info *rt, int mtu)
+{
+ /* If the new MTU is lower than the route PMTU, this new MTU will be the
+ * lowest MTU in the path: always allow updating the route PMTU to
+ * reflect PMTU decreases.
+ *
+ * If the new MTU is higher, and the route PMTU is equal to the local
+ * MTU, this means the old MTU is the lowest in the path, so allow
+ * updating it: if other nodes now have lower MTUs, PMTU discovery will
+ * handle this.
+ */
+
+ if (dst_mtu(&rt->dst) >= mtu)
+ return true;
+
+ if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+ return true;
+
+ return false;
+}
+
+static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
+ struct rt6_info *rt, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1518,20 +1549,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
- if (bucket) {
- for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
- struct rt6_info *entry = rt6_ex->rt6i;
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
- entry->rt6i_pmtu = mtu;
- }
- bucket++;
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
+ * route), the metrics of its rt->dst.from have already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu &&
+ rt6_mtu_change_route_allowed(idev, entry, mtu))
+ entry->rt6i_pmtu = mtu;
}
+ bucket++;
}
}
@@ -1601,11 +1634,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
struct neighbour *neigh;
__u8 neigh_flags = 0;
- neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
- if (neigh) {
+ neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ if (neigh)
neigh_flags = neigh->flags;
- neigh_release(neigh);
- }
+
if (!(neigh_flags & NTF_ROUTER)) {
RT6_TRACE("purging route %p via non-router but gateway\n",
rt);
@@ -1629,7 +1661,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
if (!rcu_access_pointer(rt->rt6i_exception_bucket))
return;
- spin_lock_bh(&rt6_exception_lock);
+ rcu_read_lock_bh();
+ spin_lock(&rt6_exception_lock);
bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
@@ -1643,11 +1676,13 @@ void rt6_age_exceptions(struct rt6_info *rt,
bucket++;
}
}
- spin_unlock_bh(&rt6_exception_lock);
+ spin_unlock(&rt6_exception_lock);
+ rcu_read_unlock_bh();
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1704,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select:
rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict);
+ rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
@@ -1768,28 +1803,35 @@ uncached_rt_out:
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
@@ -1811,26 +1853,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
- memset(keys, 0, sizeof(*keys));
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ u32 mhash;
- if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys) >> 1;
+ switch (ip6_multipath_hash_policy(net)) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+ } else {
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
+ case 1:
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+ hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.ports.src = fl6->fl6_sport;
+ hash_keys.ports.dst = fl6->fl6_dport;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
}
+ mhash = flow_hash_from_keys(&hash_keys);
- return get_hash_from_flowi6(fl6) >> 1;
+ return mhash >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -1847,20 +1939,29 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+ skb_dst_set(skb,
+ ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1888,7 +1989,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
@@ -2128,6 +2229,23 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
+ const struct flowi6 *fl6)
+{
+#ifdef CONFIG_IPV6_SUBTREES
+ struct ipv6_pinfo *np = inet6_sk(sk);
+#endif
+
+ ip6_dst_store(sk, dst,
+ ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2137,6 +2255,7 @@ struct ip6rd_flowi {
static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2210,8 +2329,9 @@ out:
};
static struct dst_entry *ip6_route_redirect(struct net *net,
- const struct flowi6 *fl6,
- const struct in6_addr *gateway)
+ const struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ const struct in6_addr *gateway)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip6rd_flowi rdfl;
@@ -2219,7 +2339,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
rdfl.fl6 = *fl6;
rdfl.gateway = *gateway;
- return fib6_rule_lookup(net, &rdfl.fl6,
+ return fib6_rule_lookup(net, &rdfl.fl6, skb,
flags, __ip6_route_redirect);
}
@@ -2239,7 +2359,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
fl6.flowlabel = ip6_flowinfo(iph);
fl6.flowi6_uid = uid;
- dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2261,7 +2381,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
fl6.saddr = iph->daddr;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2463,7 +2583,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
/* if table lookup failed, fall back to full lookup */
if (rt == net->ipv6.ip6_null_entry) {
@@ -2476,7 +2596,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
static int ip6_route_check_nh_onlink(struct net *net,
struct fib6_config *cfg,
- struct net_device *dev,
+ const struct net_device *dev,
struct netlink_ext_ack *extack)
{
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2526,7 +2646,7 @@ static int ip6_route_check_nh(struct net *net,
}
if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
if (!grt)
goto out;
@@ -2552,6 +2672,79 @@ out:
return err;
}
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+ struct net_device **_dev, struct inet6_dev **idev,
+ struct netlink_ext_ack *extack)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ int gwa_type = ipv6_addr_type(gw_addr);
+ bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+ const struct net_device *dev = *_dev;
+ bool need_addr_check = !dev;
+ int err = -EINVAL;
+
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ if (dev &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+ /* IPv6 strictly inhibits using not link-local
+ * addresses as nexthop address.
+ * Otherwise, router will not able to send redirects.
+ * It is very good, but in some (rare!) circumstances
+ * (SIT, PtP, NBMA NOARP links) it is handy to allow
+ * some exceptions. --ANK
+ * We allow IPv4-mapped nexthops to support RFC4798-type
+ * addressing
+ */
+ if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK)
+ err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+ else
+ err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+ if (err)
+ goto out;
+ }
+
+ /* reload in case device was changed */
+ dev = *_dev;
+
+ err = -EINVAL;
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
+ goto out;
+ }
+
+ /* if we did not check gw_addr above, do so now that the
+ * egress device has been resolved.
+ */
+ if (need_addr_check &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2671,14 +2864,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
- if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_output = rt->dst.output;
- rt->dst.output = lwtunnel_output;
- }
- if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_input = rt->dst.input;
- rt->dst.input = lwtunnel_input;
- }
+ lwtunnel_set_redirect(&rt->dst);
}
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2741,67 +2927,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
}
if (cfg->fc_flags & RTF_GATEWAY) {
- const struct in6_addr *gw_addr;
- int gwa_type;
-
- gw_addr = &cfg->fc_gateway;
- gwa_type = ipv6_addr_type(gw_addr);
-
- /* if gw_addr is local we will fail to detect this in case
- * address is still TENTATIVE (DAD in progress). rt6_lookup()
- * will return already-added prefix route via interface that
- * prefix route was assigned to, which might be non-loopback.
- */
- err = -EINVAL;
- if (ipv6_chk_addr_and_flags(net, gw_addr,
- gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0)) {
- NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
goto out;
- }
- rt->rt6i_gateway = *gw_addr;
-
- if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- /* IPv6 strictly inhibits using not link-local
- addresses as nexthop address.
- Otherwise, router will not able to send redirects.
- It is very good, but in some (rare!) circumstances
- (SIT, PtP, NBMA NOARP links) it is handy to allow
- some exceptions. --ANK
- We allow IPv4-mapped nexthops to support RFC4798-type
- addressing
- */
- if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED))) {
- NL_SET_ERR_MSG(extack,
- "Invalid gateway address");
- goto out;
- }
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- err = ip6_route_check_nh_onlink(net, cfg, dev,
- extack);
- } else {
- err = ip6_route_check_nh(net, cfg, &dev, &idev);
- }
- if (err)
- goto out;
- }
- err = -EINVAL;
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Egress device not specified");
- goto out;
- } else if (dev->flags & IFF_LOOPBACK) {
- NL_SET_ERR_MSG(extack,
- "Egress device can not be loopback device for this route");
- goto out;
- }
+ rt->rt6i_gateway = cfg->fc_gateway;
}
err = -ENODEV;
if (!dev)
goto out;
+ if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+ err = -EACCES;
+ goto out;
+ }
+
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
err = -ENETDOWN;
@@ -3809,25 +3951,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
- /*
- If new MTU is less than route PMTU, this new MTU will be the
- lowest MTU in the path, update the route PMTU to reflect PMTU
- decreases; if new MTU is greater than route PMTU, and the
- old MTU is the lowest MTU in the path, update the route PMTU
- to reflect the increase. In this case if the other nodes' MTU
- also have the lowest MTU, TOO BIG MESSAGE will be lead to
- PMTU discovery.
- */
if (rt->dst.dev == arg->dev &&
- dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
spin_lock_bh(&rt6_exception_lock);
- if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
+ rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
- }
- rt6_exceptions_update_pmtu(rt, arg->mtu);
+ rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
spin_unlock_bh(&rt6_exception_lock);
}
return 0;
@@ -4099,6 +4229,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
r_cfg.fc_encap_type = nla_get_u16(nla);
}
+ r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
rt = ip6_route_info_create(&r_cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -4598,7 +4729,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
rcu_read_unlock();
} else {
@@ -4963,7 +5094,7 @@ static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
- proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+ proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
#endif
return 0;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index bd6cc688bd19..f343e6f0fc95 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -16,6 +16,7 @@
#include <linux/net.h>
#include <linux/module.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/netns/generic.h>
@@ -93,7 +94,8 @@ static void set_tun_src(struct net *net, struct net_device *dev,
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(dst->dev);
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
@@ -134,7 +136,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
@@ -210,11 +212,6 @@ static int seg6_do_srh(struct sk_buff *skb)
tinfo = seg6_encap_lwtunnel(dst->lwtstate);
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
-
switch (tinfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
if (skb->protocol != htons(ETH_P_IPV6))
@@ -223,10 +220,12 @@ static int seg6_do_srh(struct sk_buff *skb)
err = seg6_do_srh_inline(skb, tinfo->srh);
if (err)
return err;
-
- skb_reset_inner_headers(skb);
break;
case SEG6_IPTUN_MODE_ENCAP:
+ err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
+ if (err)
+ return err;
+
if (skb->protocol == htons(ETH_P_IPV6))
proto = IPPROTO_IPV6;
else if (skb->protocol == htons(ETH_P_IP))
@@ -238,6 +237,8 @@ static int seg6_do_srh(struct sk_buff *skb)
if (err)
return err;
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = htons(ETH_P_IPV6);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
@@ -261,8 +262,6 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- skb_set_inner_protocol(skb, skb->protocol);
-
return 0;
}
@@ -418,7 +417,7 @@ static int seg6_build_state(struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
- err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+ err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
if (err) {
kfree(newts);
return err;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) {
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
} else {
struct fib6_table *table;
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (!table)
goto out;
- rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
dst = &rt->dst;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3a1775a62973..2afce37a7177 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel *t = netdev_priv(dev);
- if (dev == sitn->fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
t->ip6rd.relay_prefix = 0;
t->ip6rd.prefixlen = 16;
@@ -250,11 +250,13 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
if (!create)
goto failed;
- if (parms->name[0])
+ if (parms->name[0]) {
+ if (!dev_valid_name(parms->name))
+ goto failed;
strlcpy(name, parms->name, IFNAMSIZ);
- else
+ } else {
strcpy(name, "sit%d");
-
+ }
dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
if (!dev)
@@ -1578,6 +1580,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+ if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+ dev->mtu = mtu;
+ }
+
#ifdef CONFIG_IPV6_SIT_6RD
if (ipip6_netlink_6rd_parms(data, &ip6rd))
err = ipip6_tunnel_update_6rd(nt, &ip6rd);
@@ -1828,6 +1837,9 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[2] = sitn->tunnels_r;
sitn->tunnels[3] = sitn->tunnels_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
+
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e7a3a6b6cf56..e997141aed8c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -217,6 +217,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq->snt_isn = cookie;
treq->ts_off = 0;
treq->txhash = net_tx_rndhash();
+ if (IS_ENABLED(CONFIG_SMC))
+ ireq->smc_ok = 0;
/*
* We need to lookup the dst_entry to get the correct window size.
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..6fbdef630152 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/netevent.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+static int zero;
static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_policy);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
static struct ctl_table ipv6_table_template[] = {
{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv6.sysctl.multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_policy,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+ ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..6d664d83cd16 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
ipv6_hdr(skb)->saddr.s6_addr32);
}
+static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v6_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+}
+
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
@@ -1451,6 +1466,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
@@ -1470,10 +1486,20 @@ process:
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v6_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
@@ -1914,6 +1940,7 @@ struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..4ec76a87aeb8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
}
+static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* The following checks are replicated from __ip6_datagram_connect()
+ * and intended to prevent BPF program called below from accessing
+ * bytes that are out of the bound specified by user in addr_len.
+ */
+ if (uaddr->sa_family == AF_INET) {
+ if (__ipv6_only_sock(sk))
+ return -EAFNOSUPPORT;
+ return udp_pre_connect(sk, uaddr, addr_len);
+ }
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1097,10 +1116,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
+ bool connected = false;
int ulen = len;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
- int connected = 0;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sockcm_cookie sockc;
@@ -1222,7 +1241,7 @@ do_udp_sendmsg:
fl6.fl6_dport = inet->inet_dport;
daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
- connected = 1;
+ connected = true;
}
if (!fl6.flowi6_oif)
@@ -1252,7 +1271,7 @@ do_udp_sendmsg:
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
- connected = 0;
+ connected = false;
}
if (!opt) {
opt = txopt_get(np);
@@ -1274,11 +1293,11 @@ do_udp_sendmsg:
final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
- connected = 0;
+ connected = false;
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
fl6.flowi6_oif = np->mcast_oif;
- connected = 0;
+ connected = false;
} else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
@@ -1289,7 +1308,7 @@ do_udp_sendmsg:
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
@@ -1314,7 +1333,7 @@ back_from_confirm:
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
- goto release_dst;
+ goto out;
}
lock_sock(sk);
@@ -1348,23 +1367,6 @@ do_append_data:
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
-release_dst:
- if (dst) {
- if (connected) {
- ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
- &sk->sk_v6_daddr : NULL,
-#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
- &np->saddr :
-#endif
- NULL);
- } else {
- dst_release(dst);
- }
- dst = NULL;
- }
-
out:
dst_release(dst);
fl6_sock_release(flowlabel);
@@ -1509,34 +1511,35 @@ void udp6_proc_exit(struct net *net)
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
- .name = "UDPv6",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip6_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udpv6_destroy_sock,
- .setsockopt = udpv6_setsockopt,
- .getsockopt = udpv6_getsockopt,
- .sendmsg = udpv6_sendmsg,
- .recvmsg = udpv6_recvmsg,
- .release_cb = ip6_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v6_rehash,
- .get_port = udp_v6_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp6_sock),
- .h.udp_table = &udp_table,
+ .name = "UDPv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .pre_connect = udpv6_pre_connect,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .release_cb = ip6_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v6_rehash,
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp6_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udpv6_setsockopt,
- .compat_getsockopt = compat_udpv6_getsockopt,
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index bb935a3b7fea..de1b0b8c53b0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
- eth_hdr(skb)->h_proto = skb->protocol;
+ if (skb->mac_len)
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8ae87d4ec5ff..5959ce9620eb 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if ((!skb_is_gso(skb) && skb->len > mtu) ||
(skb_is_gso(skb) &&
- skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) {
+ !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 09fb44ee3b45..416fe67271a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
xdst->u.rt6.rt6i_src = rt->rt6i_src;
+ INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+ rt6_uncached_list_add(&xdst->u.rt6);
+ atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
return 0;
}
@@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
+ if (xdst->u.rt6.rt6i_uncached_list)
+ rt6_uncached_list_del(&xdst->u.rt6);
xfrm_dst_destroy(xdst);
}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e8cc7bcbca3..893a022f9620 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -989,14 +989,13 @@ done:
}
static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
addr->sa_family = AF_IUCV;
- *len = sizeof(struct sockaddr_iucv);
if (peer) {
memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
- return 0;
+ return sizeof(struct sockaddr_iucv);
}
/**
@@ -2433,9 +2432,11 @@ static int afiucv_iucv_init(void)
af_iucv_dev->driver = &af_iucv_driver;
err = device_register(af_iucv_dev);
if (err)
- goto out_driver;
+ goto out_iucv_dev;
return 0;
+out_iucv_dev:
+ put_device(af_iucv_dev);
out_driver:
driver_unregister(&af_iucv_driver);
out_iucv:
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..1fac92543094 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -269,7 +269,7 @@ static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
struct proc_dir_entry *p;
int rc = 0;
- p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(muxinfo->name, 0444, net->proc_net,
muxinfo->seq_fops, muxinfo);
if (!p)
rc = -ENOMEM;
@@ -406,7 +406,7 @@ static int kcm_proc_init_net(struct net *net)
{
int err;
- if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
+ if (!proc_create("kcm_stats", 0444, net->proc_net,
&kcm_stats_seq_fops)) {
err = -ENOMEM;
goto out_kcm_stats;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index f297d53a11aa..dc76bc346829 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
.parse_msg = kcm_parse_func_strparser,
.read_sock_done = kcm_read_sock_done,
};
- int err;
+ int err = 0;
csk = csock->sk;
if (!csk)
return -EINVAL;
+ lock_sock(csk);
+
/* Only allow TCP sockets to be attached for now */
if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
- csk->sk_protocol != IPPROTO_TCP)
- return -EOPNOTSUPP;
+ csk->sk_protocol != IPPROTO_TCP) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
/* Don't allow listeners or closed sockets */
- if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
- return -EOPNOTSUPP;
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
- if (!psock)
- return -ENOMEM;
+ if (!psock) {
+ err = -ENOMEM;
+ goto out;
+ }
psock->mux = mux;
psock->sk = csk;
@@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
err = strp_init(&psock->strp, csk, &cb);
if (err) {
kmem_cache_free(kcm_psockp, psock);
- return err;
+ goto out;
}
write_lock_bh(&csk->sk_callback_lock);
@@ -1417,9 +1425,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
+ strp_stop(&psock->strp);
strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
- return -EALREADY;
+ err = -EALREADY;
+ goto out;
}
psock->save_data_ready = csk->sk_data_ready;
@@ -1455,7 +1465,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
/* Schedule RX work in case there are already bytes queued */
strp_check_rcv(&psock->strp);
- return 0;
+out:
+ release_sock(csk);
+
+ return err;
}
static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
@@ -1507,6 +1520,7 @@ static void kcm_unattach(struct kcm_psock *psock)
if (WARN_ON(psock->rx_kcm)) {
write_unlock_bh(&csk->sk_callback_lock);
+ release_sock(csk);
return;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 194a7483bb93..0fbd3ee26165 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,13 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
+#if IS_ENABLED(CONFIG_IPV6)
+static bool l2tp_sk_is_v6(struct sock *sk)
+{
+ return sk->sk_family == PF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr);
+}
+#endif
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
{
@@ -136,51 +143,6 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
}
-/* Lookup the tunnel socket, possibly involving the fs code if the socket is
- * owned by userspace. A struct sock returned from this function must be
- * released using l2tp_tunnel_sock_put once you're done with it.
- */
-static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
-{
- int err = 0;
- struct socket *sock = NULL;
- struct sock *sk = NULL;
-
- if (!tunnel)
- goto out;
-
- if (tunnel->fd >= 0) {
- /* Socket is owned by userspace, who might be in the process
- * of closing it. Look the socket up using the fd to ensure
- * consistency.
- */
- sock = sockfd_lookup(tunnel->fd, &err);
- if (sock)
- sk = sock->sk;
- } else {
- /* Socket is owned by kernelspace */
- sk = tunnel->sock;
- sock_hold(sk);
- }
-
-out:
- return sk;
-}
-
-/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */
-static void l2tp_tunnel_sock_put(struct sock *sk)
-{
- struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
- if (tunnel) {
- if (tunnel->fd >= 0) {
- /* Socket is owned by userspace */
- sockfd_put(sk->sk_socket);
- }
- sock_put(sk);
- }
- sock_put(sk);
-}
-
/* Session hash list.
* The session_id SHOULD be random according to RFC2661, but several
* L2TP implementations (Cisco and Microsoft) use incrementing
@@ -193,6 +155,13 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+ sock_put(tunnel->sock);
+ /* the tunnel is freed in the socket destructor */
+}
+EXPORT_SYMBOL(l2tp_tunnel_free);
+
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
{
@@ -345,13 +314,11 @@ int l2tp_session_register(struct l2tp_session *session,
}
l2tp_tunnel_inc_refcount(tunnel);
- sock_hold(tunnel->sock);
hlist_add_head_rcu(&session->global_hlist, g_head);
spin_unlock_bh(&pn->l2tp_session_hlist_lock);
} else {
l2tp_tunnel_inc_refcount(tunnel);
- sock_hold(tunnel->sock);
}
hlist_add_head(&session->hlist, head);
@@ -368,26 +335,6 @@ err_tlock:
}
EXPORT_SYMBOL_GPL(l2tp_session_register);
-/* Lookup a tunnel by id
- */
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
-{
- struct l2tp_tunnel *tunnel;
- struct l2tp_net *pn = l2tp_pernet(net);
-
- rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id) {
- rcu_read_unlock_bh();
- return tunnel;
- }
- }
- rcu_read_unlock_bh();
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-
struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
{
struct l2tp_net *pn = l2tp_pernet(net);
@@ -969,7 +916,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- tunnel = l2tp_sock_to_tunnel(sk);
+ tunnel = l2tp_tunnel(sk);
if (tunnel == NULL)
goto pass_up;
@@ -977,13 +924,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
tunnel->name, skb->len);
if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
- goto pass_up_put;
+ goto pass_up;
- sock_put(sk);
return 0;
-pass_up_put:
- sock_put(sk);
pass_up:
return 1;
}
@@ -1092,7 +1036,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->ignore_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (l2tp_sk_is_v6(tunnel->sock))
error = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
@@ -1155,6 +1099,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
goto out_unlock;
}
+ /* The user-space may change the connection status for the user-space
+ * provided socket at run time: we must check it under the socket lock
+ */
+ if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
+ kfree_skb(skb);
+ ret = NET_XMIT_DROP;
+ goto out_unlock;
+ }
+
/* Get routing info from the tunnel socket */
skb_dst_drop(skb);
skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
@@ -1174,7 +1127,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (l2tp_sk_is_v6(sk))
udp6_set_csum(udp_get_no_check6_tx(sk),
skb, &inet6_sk(sk)->saddr,
&sk->sk_v6_daddr, udp_len);
@@ -1207,14 +1160,12 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
static void l2tp_tunnel_destruct(struct sock *sk)
{
struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
- struct l2tp_net *pn;
if (tunnel == NULL)
goto end;
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
-
/* Disable udp encapsulation */
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
@@ -1231,18 +1182,11 @@ static void l2tp_tunnel_destruct(struct sock *sk)
sk->sk_destruct = tunnel->old_sk_destruct;
sk->sk_user_data = NULL;
- /* Remove the tunnel struct from the tunnel list */
- pn = l2tp_pernet(tunnel->l2tp_net);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_del_rcu(&tunnel->list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
- tunnel->sock = NULL;
- l2tp_tunnel_dec_refcount(tunnel);
-
/* Call the original destructor */
if (sk->sk_destruct)
(*sk->sk_destruct)(sk);
+
+ kfree_rcu(tunnel, rcu);
end:
return;
}
@@ -1303,49 +1247,43 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
/* Tunnel socket destroy hook for UDP encapsulation */
static void l2tp_udp_encap_destroy(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
- if (tunnel) {
- l2tp_tunnel_closeall(tunnel);
- sock_put(sk);
- }
+ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
+
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
}
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
- struct l2tp_tunnel *tunnel = NULL;
- struct socket *sock = NULL;
- struct sock *sk = NULL;
-
- tunnel = container_of(work, struct l2tp_tunnel, del_work);
+ struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel,
+ del_work);
+ struct sock *sk = tunnel->sock;
+ struct socket *sock = sk->sk_socket;
+ struct l2tp_net *pn;
l2tp_tunnel_closeall(tunnel);
- sk = l2tp_tunnel_sock_lookup(tunnel);
- if (!sk)
- goto out;
-
- sock = sk->sk_socket;
-
- /* If the tunnel socket was created by userspace, then go through the
- * inet layer to shut the socket down, and let userspace close it.
- * Otherwise, if we created the socket directly within the kernel, use
+ /* If the tunnel socket was created within the kernel, use
* the sk API to release it here.
- * In either case the tunnel resources are freed in the socket
- * destructor when the tunnel socket goes away.
*/
- if (tunnel->fd >= 0) {
- if (sock)
- inet_shutdown(sock, 2);
- } else {
+ if (tunnel->fd < 0) {
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
}
- l2tp_tunnel_sock_put(sk);
-out:
+ /* Remove the tunnel struct from the tunnel list */
+ pn = l2tp_pernet(tunnel->l2tp_net);
+ spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ list_del_rcu(&tunnel->list);
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+ /* drop initial ref */
+ l2tp_tunnel_dec_refcount(tunnel);
+
+ /* drop workqueue ref */
l2tp_tunnel_dec_refcount(tunnel);
}
@@ -1478,70 +1416,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
{
struct l2tp_tunnel *tunnel = NULL;
int err;
- struct socket *sock = NULL;
- struct sock *sk = NULL;
- struct l2tp_net *pn;
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
- /* Get the tunnel socket from the fd, which was opened by
- * the userspace L2TP daemon. If not specified, create a
- * kernel socket.
- */
- if (fd < 0) {
- err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
- cfg, &sock);
- if (err < 0)
- goto err;
- } else {
- sock = sockfd_lookup(fd, &err);
- if (!sock) {
- pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
- tunnel_id, fd, err);
- err = -EBADF;
- goto err;
- }
-
- /* Reject namespace mismatches */
- if (!net_eq(sock_net(sock->sk), net)) {
- pr_err("tunl %u: netns mismatch\n", tunnel_id);
- err = -EINVAL;
- goto err;
- }
- }
-
- sk = sock->sk;
-
if (cfg != NULL)
encap = cfg->encap;
- /* Quick sanity checks */
- switch (encap) {
- case L2TP_ENCAPTYPE_UDP:
- err = -EPROTONOSUPPORT;
- if (sk->sk_protocol != IPPROTO_UDP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
- goto err;
- }
- break;
- case L2TP_ENCAPTYPE_IP:
- err = -EPROTONOSUPPORT;
- if (sk->sk_protocol != IPPROTO_L2TP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
- goto err;
- }
- break;
- }
-
- /* Check if this socket has already been prepped */
- tunnel = l2tp_tunnel(sk);
- if (tunnel != NULL) {
- /* This socket has already been prepped */
- err = -EBUSY;
- goto err;
- }
-
tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
if (tunnel == NULL) {
err = -ENOMEM;
@@ -1558,85 +1437,125 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
rwlock_init(&tunnel->hlist_lock);
tunnel->acpt_newsess = true;
- /* The net we belong to */
+ if (cfg != NULL)
+ tunnel->debug = cfg->debug;
+
+ tunnel->encap = encap;
+
+ refcount_set(&tunnel->ref_count, 1);
+ tunnel->fd = fd;
+
+ /* Init delete workqueue struct */
+ INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
+
+ INIT_LIST_HEAD(&tunnel->list);
+
+ err = 0;
+err:
+ if (tunnelp)
+ *tunnelp = tunnel;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+
+static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
+ enum l2tp_encap_type encap)
+{
+ if (!net_eq(sock_net(sk), net))
+ return -EINVAL;
+
+ if (sk->sk_type != SOCK_DGRAM)
+ return -EPROTONOSUPPORT;
+
+ if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
+ (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
+ return -EPROTONOSUPPORT;
+
+ if (sk->sk_user_data)
+ return -EBUSY;
+
+ return 0;
+}
+
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ struct l2tp_tunnel_cfg *cfg)
+{
+ struct l2tp_tunnel *tunnel_walk;
+ struct l2tp_net *pn;
+ struct socket *sock;
+ struct sock *sk;
+ int ret;
+
+ if (tunnel->fd < 0) {
+ ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
+ tunnel->peer_tunnel_id, cfg,
+ &sock);
+ if (ret < 0)
+ goto err;
+ } else {
+ sock = sockfd_lookup(tunnel->fd, &ret);
+ if (!sock)
+ goto err;
+
+ ret = l2tp_validate_socket(sock->sk, net, tunnel->encap);
+ if (ret < 0)
+ goto err_sock;
+ }
+
+ sk = sock->sk;
+
+ sock_hold(sk);
+ tunnel->sock = sk;
tunnel->l2tp_net = net;
+
pn = l2tp_pernet(net);
- if (cfg != NULL)
- tunnel->debug = cfg->debug;
+ spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
+ if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- if (ipv6_addr_v4mapped(&np->saddr) &&
- ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
- struct inet_sock *inet = inet_sk(sk);
-
- tunnel->v4mapped = true;
- inet->inet_saddr = np->saddr.s6_addr32[3];
- inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
- inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
- } else {
- tunnel->v4mapped = false;
+ ret = -EEXIST;
+ goto err_sock;
}
}
-#endif
-
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- tunnel->encap = encap;
- if (encap == L2TP_ENCAPTYPE_UDP) {
- struct udp_tunnel_sock_cfg udp_cfg = { };
+ list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- udp_cfg.sk_user_data = tunnel;
- udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
- udp_cfg.encap_rcv = l2tp_udp_encap_recv;
- udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ struct udp_tunnel_sock_cfg udp_cfg = {
+ .sk_user_data = tunnel,
+ .encap_type = UDP_ENCAP_L2TPINUDP,
+ .encap_rcv = l2tp_udp_encap_recv,
+ .encap_destroy = l2tp_udp_encap_destroy,
+ };
setup_udp_tunnel_sock(net, sock, &udp_cfg);
} else {
sk->sk_user_data = tunnel;
}
- /* Hook on the tunnel socket destructor so that we can cleanup
- * if the tunnel socket goes away.
- */
tunnel->old_sk_destruct = sk->sk_destruct;
sk->sk_destruct = &l2tp_tunnel_destruct;
- tunnel->sock = sk;
- tunnel->fd = fd;
- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
-
+ lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
+ "l2tp_sock");
sk->sk_allocation = GFP_ATOMIC;
- /* Init delete workqueue struct */
- INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
-
- /* Add tunnel to our list */
- INIT_LIST_HEAD(&tunnel->list);
-
- /* Bump the reference count. The tunnel context is deleted
- * only when this drops to zero. Must be done before list insertion
- */
- refcount_set(&tunnel->ref_count, 1);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ if (tunnel->fd >= 0)
+ sockfd_put(sock);
- err = 0;
-err:
- if (tunnelp)
- *tunnelp = tunnel;
+ return 0;
- /* If tunnel's socket was created by the kernel, it doesn't
- * have a file.
- */
- if (sock && sock->file)
+err_sock:
+ if (tunnel->fd < 0)
+ sock_release(sock);
+ else
sockfd_put(sock);
-
- return err;
+err:
+ return ret;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
/* This function is used by the netlink TUNNEL_DELETE command.
*/
@@ -1659,8 +1578,6 @@ void l2tp_session_free(struct l2tp_session *session)
if (tunnel) {
BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
- sock_put(tunnel->sock);
- session->tunnel = NULL;
l2tp_tunnel_dec_refcount(tunnel);
}
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9bbee90e9963..ba33cbec71eb 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -188,9 +188,6 @@ struct l2tp_tunnel {
struct sock *sock; /* Parent socket */
int fd; /* Parent fd, if tunnel socket
* was created by userspace */
-#if IS_ENABLED(CONFIG_IPV6)
- bool v4mapped;
-#endif
struct work_struct del_work;
@@ -214,27 +211,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
return &session->priv[0];
}
-static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
-{
- struct l2tp_tunnel *tunnel;
-
- if (sk == NULL)
- return NULL;
-
- sock_hold(sk);
- tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
- if (tunnel == NULL) {
- sock_put(sk);
- goto out;
- }
-
- BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-out:
- return tunnel;
-}
-
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
@@ -242,12 +220,14 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ struct l2tp_tunnel_cfg *cfg);
+
void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_create(int priv_size,
@@ -283,7 +263,7 @@ static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
{
if (refcount_dec_and_test(&tunnel->ref_count))
- kfree_rcu(tunnel, rcu);
+ l2tp_tunnel_free(tunnel);
}
/* Session reference counts. Incremented when code obtains a reference
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ff61124fdf59..a9c05b2bc1b0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -234,17 +234,13 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
static void l2tp_ip_destroy_sock(struct sock *sk)
{
struct sk_buff *skb;
- struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel = sk->sk_user_data;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb);
- if (tunnel) {
- l2tp_tunnel_closeall(tunnel);
- sock_put(sk);
- }
-
- sk_refcnt_debug_dec(sk);
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
}
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -349,7 +345,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
}
static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -370,8 +366,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_conn_id = lsk->conn_id;
lsa->l2tp_addr.s_addr = addr;
}
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 192344688c06..957369192ca1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -248,16 +248,14 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
static void l2tp_ip6_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel = sk->sk_user_data;
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
- if (tunnel) {
- l2tp_tunnel_closeall(tunnel);
- sock_put(sk);
- }
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
inet6_destroy_sock(sk);
}
@@ -421,7 +419,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
}
static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
struct sock *sk = sock->sk;
@@ -449,8 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
}
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = sk->sk_bound_dev_if;
- *uaddr_len = sizeof(*lsa);
- return 0;
+ return sizeof(*lsa);
}
static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index e7ea9c4b89ff..b05dbd9ffcb2 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -236,12 +236,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
if (info->attrs[L2TP_ATTR_DEBUG])
cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL) {
- ret = -EEXIST;
- goto out;
- }
-
ret = -EINVAL;
switch (cfg.encap) {
case L2TP_ENCAPTYPE_UDP:
@@ -251,9 +245,19 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
break;
}
- if (ret >= 0)
- ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
- tunnel, L2TP_CMD_TUNNEL_CREATE);
+ if (ret < 0)
+ goto out;
+
+ l2tp_tunnel_inc_refcount(tunnel);
+ ret = l2tp_tunnel_register(tunnel, net, &cfg);
+ if (ret < 0) {
+ kfree(tunnel);
+ goto out;
+ }
+ ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
+ L2TP_CMD_TUNNEL_CREATE);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 59f246d7b290..896bbca9bdaa 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -416,20 +416,28 @@ abort:
* Session (and tunnel control) socket create/destroy.
*****************************************************************************/
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+ struct pppol2tp_session *ps;
+
+ ps = container_of(head, typeof(*ps), rcu);
+ sock_put(ps->__sk);
+}
+
/* Called by l2tp_core when a session socket is being closed.
*/
static void pppol2tp_session_close(struct l2tp_session *session)
{
- struct sock *sk;
-
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+ struct pppol2tp_session *ps;
- sk = pppol2tp_session_get_sock(session);
- if (sk) {
- if (sk->sk_socket)
- inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
- sock_put(sk);
- }
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ if (ps->__sk)
+ call_rcu(&ps->rcu, pppol2tp_put_sk);
+ mutex_unlock(&ps->sk_lock);
}
/* Really kill the session socket. (Called from sock_put() if
@@ -449,14 +457,6 @@ static void pppol2tp_session_destruct(struct sock *sk)
}
}
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
- struct pppol2tp_session *ps;
-
- ps = container_of(head, typeof(*ps), rcu);
- sock_put(ps->__sk);
-}
-
/* Called when the PPPoX socket (session) is closed.
*/
static int pppol2tp_release(struct socket *sock)
@@ -480,26 +480,17 @@ static int pppol2tp_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
+ /* If the socket is associated with a session,
+ * l2tp_session_delete will call pppol2tp_session_close which
+ * will drop the session's ref on the socket.
+ */
session = pppol2tp_sock_to_session(sk);
-
- if (session != NULL) {
- struct pppol2tp_session *ps;
-
+ if (session) {
l2tp_session_delete(session);
-
- ps = l2tp_session_priv(session);
- mutex_lock(&ps->sk_lock);
- ps->__sk = rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock));
- RCU_INIT_POINTER(ps->sk, NULL);
- mutex_unlock(&ps->sk_lock);
- call_rcu(&ps->rcu, pppol2tp_put_sk);
-
- /* Rely on the sock_put() call at the end of the function for
- * dropping the reference held by pppol2tp_sock_to_session().
- * The last reference will be dropped by pppol2tp_put_sk().
- */
+ /* drop the ref obtained by pppol2tp_sock_to_session */
+ sock_put(sk);
}
+
release_sock(sk);
/* This will delete the session context via
@@ -707,6 +698,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
if (error < 0)
goto end;
+
+ l2tp_tunnel_inc_refcount(tunnel);
+ error = l2tp_tunnel_register(tunnel, sock_net(sk),
+ &tcfg);
+ if (error < 0) {
+ kfree(tunnel);
+ goto end;
+ }
+ drop_tunnel = true;
}
} else {
/* Error if we can't find the tunnel */
@@ -796,6 +796,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
out_no_ppp:
/* This is how we get the session context from the socket. */
+ sock_hold(sk);
sk->sk_user_data = session;
rcu_assign_pointer(ps->sk, sk);
mutex_unlock(&ps->sk_lock);
@@ -870,7 +871,7 @@ err:
/* getname() support.
*/
static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *usockaddr_len, int peer)
+ int peer)
{
int len = 0;
int error = 0;
@@ -969,8 +970,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(uaddr, &sp, len);
}
- *usockaddr_len = len;
- error = 0;
+ error = len;
sock_put(sk);
end:
@@ -1751,7 +1751,7 @@ static __net_init int pppol2tp_init_net(struct net *net)
struct proc_dir_entry *pde;
int err = 0;
- pde = proc_create("pppol2tp", S_IRUGO, net->proc_net,
+ pde = proc_create("pppol2tp", 0444, net->proc_net,
&pppol2tp_proc_fops);
if (!pde) {
err = -ENOMEM;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c38d16f22d2a..01dcc0823d1f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -971,7 +971,7 @@ release:
* Return the address information of a socket.
*/
static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddrlen, int peer)
+ int peer)
{
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
- *uaddrlen = sizeof(sllc);
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
IFHWADDRLEN);
}
}
- rc = 0;
sllc.sllc_family = AF_LLC;
memcpy(uaddr, &sllc, sizeof(sllc));
+ rc = sizeof(sllc);
out:
release_sock(sk);
return rc;
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index f59648018060..163121192aca 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -389,7 +389,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- llc_conn_send_pdu(sk, skb);
+ rc = llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
@@ -916,7 +916,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- llc_conn_send_pdu(sk, skb);
+ rc = llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
@@ -935,14 +935,17 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb)
{
struct llc_sock *llc = llc_sk(sk);
+ int ret;
if (llc->ack_must_be_send) {
- llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
+ ret = llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
llc->ack_must_be_send = 0 ;
llc->ack_pf = 0;
- } else
- llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
- return 0;
+ } else {
+ ret = llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
+ }
+
+ return ret;
}
/**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9177dbb16dce..110e32bcb399 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -30,7 +30,7 @@
#endif
static int llc_find_offset(int state, int ev_type);
-static void llc_conn_send_pdus(struct sock *sk);
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
static int llc_exec_conn_trans_actions(struct sock *sk,
struct llc_conn_state_trans *trans,
@@ -193,11 +193,11 @@ out_skb_put:
return rc;
}
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
{
/* queue PDU to send to MAC layer */
skb_queue_tail(&sk->sk_write_queue, skb);
- llc_conn_send_pdus(sk);
+ return llc_conn_send_pdus(sk, skb);
}
/**
@@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk);
+ llc_conn_send_pdus(sk, NULL);
out:;
}
@@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk);
+ llc_conn_send_pdus(sk, NULL);
out:;
}
@@ -340,12 +340,16 @@ out:
/**
* llc_conn_send_pdus - Sends queued PDUs
* @sk: active connection
+ * @hold_skb: the skb held by caller, or NULL if does not care
*
- * Sends queued pdus to MAC layer for transmission.
+ * Sends queued pdus to MAC layer for transmission. When @hold_skb is
+ * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
+ * successfully, or 1 for failure.
*/
-static void llc_conn_send_pdus(struct sock *sk)
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
{
struct sk_buff *skb;
+ int ret = 0;
while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -357,10 +361,20 @@ static void llc_conn_send_pdus(struct sock *sk)
skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
if (!skb2)
break;
- skb = skb2;
+ dev_queue_xmit(skb2);
+ } else {
+ bool is_target = skb == hold_skb;
+ int rc;
+
+ if (is_target)
+ skb_get(skb);
+ rc = dev_queue_xmit(skb);
+ if (is_target)
+ ret = rc;
}
- dev_queue_xmit(skb);
}
+
+ return ret;
}
/**
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 66821e8a2b7a..62ea0aed94b4 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -249,11 +249,11 @@ int __init llc_proc_init(void)
if (!llc_proc_dir)
goto out;
- p = proc_create("socket", S_IRUGO, llc_proc_dir, &llc_seq_socket_fops);
+ p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops);
if (!p)
goto out_socket;
- p = proc_create("core", S_IRUGO, llc_proc_dir, &llc_seq_core_fops);
+ p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops);
if (!p)
goto out_core;
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d90928f50226..a7f7b8ff4729 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
const struct llc_addr *laddr,
struct sk_buff *skb)
{
- int i = 0, count = 256 / sizeof(struct sock *);
- struct sock *sk, *stack[count];
+ int i = 0;
+ struct sock *sk;
+ struct sock *stack[256 / sizeof(struct sock *)];
struct llc_sock *llc;
struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
continue;
sock_hold(sk);
- if (i < count)
+ if (i < ARRAY_SIZE(stack))
stack[i++] = sk;
else {
llc_do_mcast(sap, skb, stack, i);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1f3188d03840..e83c19d4c292 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+ struct tid_ampdu_rx *tid_rx;
+
ht_dbg_ratelimited(sta->sdata,
"updated AddBA Req from %pM on tid %u\n",
sta->sta.addr, tid);
/* We have no API to update the timeout value in the
- * driver so reject the timeout update.
+ * driver so reject the timeout update if the timeout
+ * changed. If if did not change, i.e., no real update,
+ * just reply with success.
*/
- status = WLAN_STATUS_REQUEST_DECLINED;
+ rcu_read_lock();
+ tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+ if (tid_rx && tid_rx->timeout == timeout)
+ status = WLAN_STATUS_SUCCESS;
+ else
+ status = WLAN_STATUS_REQUEST_DECLINED;
+ rcu_read_unlock();
goto end;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f4195a0f0279..85dbaa891059 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -4,6 +4,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This file is GPLv2 as found in COPYING.
*/
@@ -925,6 +926,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
*/
sdata->control_port_protocol = params->crypto.control_port_ethertype;
sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt;
+ sdata->control_port_over_nl80211 =
+ params->crypto.control_port_over_nl80211;
sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local,
&params->crypto,
sdata->vif.type);
@@ -934,6 +937,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
params->crypto.control_port_ethertype;
vlan->control_port_no_encrypt =
params->crypto.control_port_no_encrypt;
+ vlan->control_port_over_nl80211 =
+ params->crypto.control_port_over_nl80211;
vlan->encrypt_headroom =
ieee80211_cs_headroom(sdata->local,
&params->crypto,
@@ -2019,6 +2024,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
if (err)
return err;
+ sdata->control_port_over_nl80211 = setup->control_port_over_nl80211;
+
/* can mesh use other SMPS modes? */
sdata->smps_mode = IEEE80211_SMPS_OFF;
sdata->needed_rx_chains = sdata->local->rx_chains;
@@ -2156,6 +2163,8 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
*/
p.uapsd = false;
+ ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac);
+
sdata->tx_conf[params->ac] = p;
if (drv_conf_tx(local, sdata, params->ac, &p)) {
wiphy_debug(local->hw.wiphy,
@@ -2313,6 +2322,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
sizeof(int) * NUM_NL80211_BANDS);
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MCAST_RATE);
+
return 0;
}
@@ -2685,6 +2696,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
ieee80211_recalc_ps(local);
ieee80211_recalc_ps_vif(sdata);
+ ieee80211_check_fast_rx_iface(sdata);
return 0;
}
@@ -3785,4 +3797,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.add_nan_func = ieee80211_add_nan_func,
.del_nan_func = ieee80211_del_nan_func,
.set_multicast_to_unicast = ieee80211_set_multicast_to_unicast,
+ .tx_control_port = ieee80211_tx_control_port,
};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f466d12a6bc..b5adf3625d16 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -212,6 +212,8 @@ static const char *hw_flag_names[] = {
FLAG(REPORTS_LOW_ACK),
FLAG(SUPPORTS_TX_FRAG),
FLAG(SUPPORTS_TDLS_BUFFER_STA),
+ FLAG(DEAUTH_NEED_MGD_TX_PREP),
+ FLAG(DOESNT_SUPPORT_QOS_NDP),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 444ea8d127fe..4105081dc1df 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
sta->cparams.ecn ? "yes" : "no");
p += scnprintf(p,
bufsz+buf-p,
- "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+ "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz+buf-p,
- "%d %d %u %u %u %u %u %u %u %u %u\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
@@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
txqi->tin.overlimit,
txqi->tin.collisions,
txqi->tin.tx_bytes,
- txqi->tin.tx_packets);
+ txqi->tin.tx_packets,
+ txqi->flags,
+ txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+ txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+ txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
}
rcu_read_unlock();
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d7523530d3f8..c78036a0ac94 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -466,6 +466,21 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
__ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST);
}
+enum nl80211_smps_mode
+ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps)
+{
+ switch (smps) {
+ case IEEE80211_SMPS_OFF:
+ return NL80211_SMPS_OFF;
+ case IEEE80211_SMPS_STATIC:
+ return NL80211_SMPS_STATIC;
+ case IEEE80211_SMPS_DYNAMIC:
+ return NL80211_SMPS_DYNAMIC;
+ default:
+ return NL80211_SMPS_OFF;
+ }
+}
+
int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps, const u8 *da,
const u8 *bssid)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index db07e0de9a03..6449a1c2283b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1839,11 +1839,12 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED
| IEEE80211_HT_PARAM_RIFS_MODE;
- changed |= BSS_CHANGED_HT;
+ changed |= BSS_CHANGED_HT | BSS_CHANGED_MCAST_RATE;
ieee80211_bss_info_change_notify(sdata, changed);
sdata->smps_mode = IEEE80211_SMPS_OFF;
sdata->needed_rx_chains = local->rx_chains;
+ sdata->control_port_over_nl80211 = params->control_port_over_nl80211;
ieee80211_queue_work(&local->hw, &sdata->work);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ae9c33cd8ada..6372dbdadf53 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -899,6 +900,7 @@ struct ieee80211_sub_if_data {
u16 sequence_number;
__be16 control_port_protocol;
bool control_port_no_encrypt;
+ bool control_port_over_nl80211;
int encrypt_headroom;
atomic_t num_tx_queued;
@@ -1734,6 +1736,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta);
void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_clear_fast_xmit(struct sta_info *sta);
+int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *buf, size_t len,
+ const u8 *dest, __be16 proto, bool unencrypted);
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
@@ -1788,6 +1793,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
+enum nl80211_smps_mode
+ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps);
/* VHT */
void
@@ -1814,6 +1821,8 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
u16 vht_mask[NL80211_VHT_NSS_MAX]);
+enum nl80211_chan_width
+ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1865,6 +1874,9 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
int rate, int erp, int short_preamble,
int shift);
+void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_tx_queue_params *qparam,
+ int ac);
void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
bool bss_notify, bool enable_qos);
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5fe01f82df12..555e389b7dfa 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -519,6 +519,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
master->control_port_protocol;
sdata->control_port_no_encrypt =
master->control_port_no_encrypt;
+ sdata->control_port_over_nl80211 =
+ master->control_port_over_nl80211;
sdata->vif.cab_queue = master->vif.cab_queue;
memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
sizeof(sdata->vif.hw_queue));
@@ -1324,8 +1326,7 @@ static void ieee80211_iface_work(struct work_struct *work)
mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta) {
- u16 tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ u16 tid = ieee80211_get_tid(hdr);
__ieee80211_stop_rx_ba_session(
sta, tid, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index aee05ec3f7ea..ee0d0cc8dc3b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -126,7 +126,7 @@ static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_sub_if_data *sdata = key->sdata;
struct sta_info *sta;
int ret = -EOPNOTSUPP;
@@ -162,7 +162,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
if (sta && !sta->uploaded)
goto out_unsupported;
- sdata = key->sdata;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
/*
* The driver doesn't know anything about VLAN interfaces.
@@ -214,8 +213,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
/* all of these we can do in software - if driver can */
if (ret == 1)
return 0;
- if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
+ if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ return 0;
return -EINVAL;
+ }
return 0;
default:
return -EINVAL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0785d04a80bc..9ea17afaa237 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -554,6 +554,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_FEATURE_USERSPACE_MPM |
NL80211_FEATURE_FULL_AP_CLIENT_STATE;
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
if (!ops->hw_scan)
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -930,8 +932,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
IEEE80211_HT_CAP_SM_PS_SHIFT;
}
- /* if low-level driver supports AP, we also support VLAN */
- if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) {
+ /* if low-level driver supports AP, we also support VLAN.
+ * drivers advertising SW_CRYPTO_CONTROL should enable AP_VLAN
+ * based on their support to transmit SW encrypted packets.
+ */
+ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP) &&
+ !ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL)) {
hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6a381cbe1e33..d51da26e9c18 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -880,7 +880,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_HT |
BSS_CHANGED_BASIC_RATES |
- BSS_CHANGED_BEACON_INT;
+ BSS_CHANGED_BEACON_INT |
+ BSS_CHANGED_MCAST_RATE;
local->fif_other_bss++;
/* mesh ifaces must set allmulti to forward mcast traffic */
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 408649bd4702..37e172701a63 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
da = ieee80211_get_DA(hdr);
sa = ieee80211_get_SA(hdr);
if (ieee80211_is_data_qos(hdr->frame_control))
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
else
tid = 0;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 39b660b9a908..69449db7e283 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7,6 +7,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -896,7 +897,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
+ skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
+ !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
if (!skb)
return;
@@ -1785,12 +1787,14 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
params[ac].acm = acm;
params[ac].uapsd = uapsd;
- if (params[ac].cw_min > params[ac].cw_max) {
+ if (params->cw_min == 0 ||
+ params[ac].cw_min > params[ac].cw_max) {
sdata_info(sdata,
"AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
params[ac].cw_min, params[ac].cw_max, aci);
return false;
}
+ ieee80211_regulatory_limit_wmm_params(sdata, &params[ac], ac);
}
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
@@ -2008,9 +2012,20 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_flush_queues(local, sdata, true);
/* deauthenticate/disassociate now */
- if (tx || frame_buf)
+ if (tx || frame_buf) {
+ /*
+ * In multi channel scenarios guarantee that the virtual
+ * interface is granted immediate airtime to transmit the
+ * deauthentication frame by calling mgd_prepare_tx, if the
+ * driver requested so.
+ */
+ if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+ !ifmgd->have_beacon)
+ drv_mgd_prepare_tx(sdata->local, sdata);
+
ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
reason, tx, frame_buf);
+ }
/* flush out frame - make sure the deauth was actually sent */
if (tx)
@@ -2151,7 +2166,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
u16 tx_time)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ u16 tid = ieee80211_get_tid(hdr);
int ac = ieee80211_ac_from_tid(tid);
struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
unsigned long now = jiffies;
@@ -3292,82 +3307,14 @@ static const u64 care_about_ies =
(1ULL << WLAN_EID_HT_OPERATION) |
(1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
-static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_mgmt *mgmt, size_t len,
- struct ieee80211_rx_status *rx_status)
+static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_if_managed *ifmgd,
+ struct ieee80211_bss_conf *bss_conf,
+ struct ieee80211_local *local,
+ struct ieee80211_rx_status *rx_status)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- size_t baselen;
- struct ieee802_11_elems elems;
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_channel *chan;
- struct sta_info *sta;
- u32 changed = 0;
- bool erp_valid;
- u8 erp_value = 0;
- u32 ncrc;
- u8 *bssid;
- u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
-
- sdata_assert_lock(sdata);
-
- /* Process beacon from the current BSS */
- baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
- if (baselen > len)
- return;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- rcu_read_unlock();
- return;
- }
-
- if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
- rcu_read_unlock();
- return;
- }
- chan = chanctx_conf->def.chan;
- rcu_read_unlock();
-
- if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
- ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
- ieee802_11_parse_elems(mgmt->u.beacon.variable,
- len - baselen, false, &elems);
-
- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
- if (elems.tim && !elems.parse_error) {
- const struct ieee80211_tim_ie *tim_ie = elems.tim;
- ifmgd->dtim_period = tim_ie->dtim_period;
- }
- ifmgd->have_beacon = true;
- ifmgd->assoc_data->need_beacon = false;
- if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
- sdata->vif.bss_conf.sync_tsf =
- le64_to_cpu(mgmt->u.beacon.timestamp);
- sdata->vif.bss_conf.sync_device_ts =
- rx_status->device_timestamp;
- if (elems.tim)
- sdata->vif.bss_conf.sync_dtim_count =
- elems.tim->dtim_count;
- else
- sdata->vif.bss_conf.sync_dtim_count = 0;
- }
- /* continue assoc process */
- ifmgd->assoc_data->timeout = jiffies;
- ifmgd->assoc_data->timeout_started = true;
- run_again(sdata, ifmgd->assoc_data->timeout);
- return;
- }
-
- if (!ifmgd->associated ||
- !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
- return;
- bssid = ifmgd->associated->bssid;
-
/* Track average RSSI from the Beacon frames of the current AP */
+
if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
@@ -3454,6 +3401,86 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
sig, GFP_KERNEL);
}
}
+}
+
+static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ size_t baselen;
+ struct ieee802_11_elems elems;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_channel *chan;
+ struct sta_info *sta;
+ u32 changed = 0;
+ bool erp_valid;
+ u8 erp_value = 0;
+ u32 ncrc;
+ u8 *bssid;
+ u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+ sdata_assert_lock(sdata);
+
+ /* Process beacon from the current BSS */
+ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
+ if (baselen > len)
+ return;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
+ rcu_read_unlock();
+ return;
+ }
+ chan = chanctx_conf->def.chan;
+ rcu_read_unlock();
+
+ if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
+ ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
+ ieee802_11_parse_elems(mgmt->u.beacon.variable,
+ len - baselen, false, &elems);
+
+ ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+ if (elems.tim && !elems.parse_error) {
+ const struct ieee80211_tim_ie *tim_ie = elems.tim;
+ ifmgd->dtim_period = tim_ie->dtim_period;
+ }
+ ifmgd->have_beacon = true;
+ ifmgd->assoc_data->need_beacon = false;
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+ sdata->vif.bss_conf.sync_tsf =
+ le64_to_cpu(mgmt->u.beacon.timestamp);
+ sdata->vif.bss_conf.sync_device_ts =
+ rx_status->device_timestamp;
+ if (elems.tim)
+ sdata->vif.bss_conf.sync_dtim_count =
+ elems.tim->dtim_count;
+ else
+ sdata->vif.bss_conf.sync_dtim_count = 0;
+ }
+ /* continue assoc process */
+ ifmgd->assoc_data->timeout = jiffies;
+ ifmgd->assoc_data->timeout_started = true;
+ run_again(sdata, ifmgd->assoc_data->timeout);
+ return;
+ }
+
+ if (!ifmgd->associated ||
+ !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+ return;
+ bssid = ifmgd->associated->bssid;
+
+ if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL))
+ ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf,
+ local, rx_status);
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
mlme_dbg_ratelimited(sdata,
@@ -4830,6 +4857,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
sdata->control_port_protocol = req->crypto.control_port_ethertype;
sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
+ sdata->control_port_over_nl80211 =
+ req->crypto.control_port_over_nl80211;
sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto,
sdata->vif.type);
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 9766c1cc4b0a..8221bc5582ab 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -690,7 +690,7 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
#ifdef CONFIG_MAC80211_DEBUGFS
mp->fixed_rate_idx = (u32) -1;
mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx",
- S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx);
+ 0666, debugfsdir, &mp->fixed_rate_idx);
#endif
minstrel_init_cck_rates(mp);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 36fc971deb86..9ad7d63d3e5b 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -214,11 +214,11 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
{
struct minstrel_sta_info *mi = priv_sta;
- mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi,
- &minstrel_stat_fops);
+ mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi,
+ &minstrel_stat_fops);
- mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir,
- mi, &minstrel_stat_csv_fops);
+ mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi,
+ &minstrel_stat_csv_fops);
}
void
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4a5bdad9f303..fb586b6e5d49 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
return;
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 7d969e300fb3..bfcc03152dc6 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -303,10 +303,10 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
{
struct minstrel_ht_sta_priv *msp = priv_sta;
- msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
- &minstrel_ht_stat_fops);
- msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO,
- dir, msp, &minstrel_ht_stat_csv_fops);
+ msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp,
+ &minstrel_ht_stat_fops);
+ msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+ &minstrel_ht_stat_csv_fops);
}
void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fd580614085b..03102aff0953 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+ if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+ if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+ flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
put_unaligned_le16(flags, pos);
pos += 2;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
@@ -1185,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
ack_policy = *ieee80211_get_qos_ctl(hdr) &
IEEE80211_QOS_CTL_ACK_POLICY_MASK;
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx) {
@@ -1524,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
ieee80211_has_pm(hdr->frame_control) &&
(ieee80211_is_data_qos(hdr->frame_control) ||
ieee80211_is_qos_nullfunc(hdr->frame_control))) {
- u8 tid;
-
- tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ u8 tid = ieee80211_get_tid(hdr);
ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
}
@@ -2243,6 +2245,32 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
return true;
}
+static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
+ struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ struct net_device *dev = sdata->dev;
+
+ if (unlikely((skb->protocol == sdata->control_port_protocol ||
+ skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) &&
+ sdata->control_port_over_nl80211)) {
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
+ struct ethhdr *ehdr = eth_hdr(skb);
+
+ cfg80211_rx_control_port(dev, skb->data, skb->len,
+ ehdr->h_source,
+ be16_to_cpu(skb->protocol), noencrypt);
+ dev_kfree_skb(skb);
+ } else {
+ /* deliver to local stack */
+ if (rx->napi)
+ napi_gro_receive(rx->napi, skb);
+ else
+ netif_receive_skb(skb);
+ }
+}
+
/*
* requires that rx->skb is a frame with ethernet header
*/
@@ -2327,13 +2355,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
#endif
if (skb) {
- /* deliver to local stack */
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->napi)
- napi_gro_receive(rx->napi, skb);
- else
- netif_receive_skb(skb);
+
+ ieee80211_deliver_skb_to_local_stack(skb, rx);
}
if (xmit_skb) {
@@ -2351,39 +2376,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
}
static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
{
struct net_device *dev = rx->sdata->dev;
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct ethhdr ethhdr;
const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
- if (unlikely(!ieee80211_is_data(fc)))
- return RX_CONTINUE;
-
- if (unlikely(!ieee80211_is_data_present(fc)))
- return RX_DROP_MONITOR;
-
- if (!(status->rx_flags & IEEE80211_RX_AMSDU))
- return RX_CONTINUE;
-
if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
- switch (rx->sdata->vif.type) {
- case NL80211_IFTYPE_AP_VLAN:
- if (!rx->sdata->u.vlan.sta)
- return RX_DROP_UNUSABLE;
- break;
- case NL80211_IFTYPE_STATION:
- if (!rx->sdata->u.mgd.use_4addr)
- return RX_DROP_UNUSABLE;
- break;
- default:
- return RX_DROP_UNUSABLE;
- }
check_da = NULL;
check_sa = NULL;
} else switch (rx->sdata->vif.type) {
@@ -2403,15 +2406,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
break;
}
- if (is_multicast_ether_addr(hdr->addr1))
- return RX_DROP_UNUSABLE;
-
skb->dev = dev;
__skb_queue_head_init(&frame_list);
if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
rx->sdata->vif.addr,
- rx->sdata->vif.type))
+ rx->sdata->vif.type,
+ data_offset))
return RX_DROP_UNUSABLE;
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2433,6 +2434,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
return RX_QUEUED;
}
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+ struct sk_buff *skb = rx->skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ __le16 fc = hdr->frame_control;
+
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data(fc)))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data_present(fc)))
+ return RX_DROP_MONITOR;
+
+ if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+ switch (rx->sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ if (!rx->sdata->u.vlan.sta)
+ return RX_DROP_UNUSABLE;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!rx->sdata->u.mgd.use_4addr)
+ return RX_DROP_UNUSABLE;
+ break;
+ default:
+ return RX_DROP_UNUSABLE;
+ }
+ }
+
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_UNUSABLE;
+
+ return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
#ifdef CONFIG_MAC80211_MESH
static ieee80211_rx_result
ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -2533,11 +2572,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
fwd_skb = skb_copy_expand(skb, local->tx_headroom +
sdata->encrypt_headroom, 0, GFP_ATOMIC);
- if (!fwd_skb) {
- net_info_ratelimited("%s: failed to clone mesh frame\n",
- sdata->name);
+ if (!fwd_skb)
goto out;
- }
fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
@@ -2791,7 +2827,8 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
!(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
int sig = 0;
- if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
+ if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
+ !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
cfg80211_report_obss_beacon(rx->local->hw.wiphy,
@@ -2848,6 +2885,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
case WLAN_HT_ACTION_SMPS: {
struct ieee80211_supported_band *sband;
enum ieee80211_smps_mode smps_mode;
+ struct sta_opmode_info sta_opmode = {};
/* convert to HT capability */
switch (mgmt->u.action.u.ht_smps.smps_control) {
@@ -2868,17 +2906,25 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
if (rx->sta->sta.smps_mode == smps_mode)
goto handled;
rx->sta->sta.smps_mode = smps_mode;
+ sta_opmode.smps_mode =
+ ieee80211_smps_mode_to_smps_mode(smps_mode);
+ sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
sband = rx->local->hw.wiphy->bands[status->band];
rate_control_rate_update(local, sband, rx->sta,
IEEE80211_RC_SMPS_CHANGED);
+ cfg80211_sta_opmode_change_notify(sdata->dev,
+ rx->sta->addr,
+ &sta_opmode,
+ GFP_KERNEL);
goto handled;
}
case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
struct ieee80211_supported_band *sband;
u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+ struct sta_opmode_info sta_opmode = {};
/* If it doesn't support 40 MHz it can't change ... */
if (!(rx->sta->sta.ht_cap.cap &
@@ -2899,9 +2945,16 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
rx->sta->sta.bandwidth = new_bw;
sband = rx->local->hw.wiphy->bands[status->band];
+ sta_opmode.bw =
+ ieee80211_sta_rx_bw_to_chan_width(rx->sta);
+ sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
rate_control_rate_update(local, sband, rx->sta,
IEEE80211_RC_BW_CHANGED);
+ cfg80211_sta_opmode_change_notify(sdata->dev,
+ rx->sta->addr,
+ &sta_opmode,
+ GFP_KERNEL);
goto handled;
}
default:
@@ -3118,7 +3171,8 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
* it transmitted were processed or returned.
*/
- if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
+ if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
+ !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
@@ -3731,15 +3785,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- /* 4-addr is harder to deal with, later maybe */
- if (sdata->u.mgd.use_4addr)
- goto clear;
- /* software powersave is a huge mess, avoid all of it */
- if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
- goto clear;
- if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
- !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
- goto clear;
if (sta->sta.tdls) {
fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3751,6 +3796,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
fastrx.expected_ds_bits =
cpu_to_le16(IEEE80211_FCTL_FROMDS);
}
+
+ if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_TODS);
+ fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ }
+
+ if (!sdata->u.mgd.powersave)
+ break;
+
+ /* software powersave is a huge mess, avoid all of it */
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+ goto clear;
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+ !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+ goto clear;
break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
@@ -3767,6 +3829,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
!sdata->u.vlan.sta);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+ sdata->u.vlan.sta) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_FROMDS);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ fastrx.internal_forward = 0;
+ }
+
break;
default:
goto clear;
@@ -3865,7 +3936,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct sta_info *sta = rx->sta;
int orig_len = skb->len;
- int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+ int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ int snap_offs = hdrlen;
struct {
u8 snap[sizeof(rfc1042_header)];
__be16 proto;
@@ -3896,10 +3968,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
(status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
return false;
- /* we don't deal with A-MSDU deaggregation here */
- if (status->rx_flags & IEEE80211_RX_AMSDU)
- return false;
-
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return false;
@@ -3921,7 +3989,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS |
IEEE80211_FCTL_TODS)) !=
fast_rx->expected_ds_bits)
- goto drop;
+ return false;
/* assign the key to drop unencrypted frames (later)
* and strip the IV/MIC if necessary
@@ -3931,21 +3999,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
snap_offs += IEEE80211_CCMP_HDR_LEN;
}
- if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
- goto drop;
- payload = (void *)(skb->data + snap_offs);
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+ if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+ goto drop;
- if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
- return false;
+ payload = (void *)(skb->data + snap_offs);
- /* Don't handle these here since they require special code.
- * Accept AARP and IPX even though they should come with a
- * bridge-tunnel header - but if we get them this way then
- * there's little point in discarding them.
- */
- if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
- payload->proto == fast_rx->control_port_protocol))
- return false;
+ if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+ return false;
+
+ /* Don't handle these here since they require special code.
+ * Accept AARP and IPX even though they should come with a
+ * bridge-tunnel header - but if we get them this way then
+ * there's little point in discarding them.
+ */
+ if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+ payload->proto == fast_rx->control_port_protocol))
+ return false;
+ }
/* after this point, don't punt to the slowpath! */
@@ -3959,12 +4030,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
}
/* statistics part of ieee80211_rx_h_sta_process() */
- stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
-
- stats->fragments++;
- stats->packets++;
-
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
if (!fast_rx->uses_rss)
@@ -3993,6 +4058,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
if (rx->key && !ieee80211_has_protected(hdr->frame_control))
goto drop;
+ if (status->rx_flags & IEEE80211_RX_AMSDU) {
+ if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+ RX_QUEUED)
+ goto drop;
+
+ return true;
+ }
+
+ stats->last_rx = jiffies;
+ stats->last_rate = sta_stats_encode_rate(status);
+
+ stats->fragments++;
+ stats->packets++;
+
/* do the header conversion - first grab the addresses */
ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index ef2becaade50..a3b1bcc2b461 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -73,7 +73,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
bool signal_valid;
struct ieee80211_sub_if_data *scan_sdata;
- if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
+ if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)
+ bss_meta.signal = 0; /* invalid signal indication */
+ else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
bss_meta.signal = rx_status->signal * 100;
else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index af0b608ee8ed..655c3d8b0d80 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2288,6 +2288,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
sinfo->expected_throughput = thr;
}
+
+ if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+ sta->status_stats.ack_signal_filled) {
+ sinfo->ack_signal = sta->status_stats.last_ack_signal;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+ }
}
u32 sta_get_expected_throughput(struct sta_info *sta)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index cd53619435b6..f64eb86ca64b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -548,6 +548,8 @@ struct sta_info {
u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
unsigned long last_ack;
+ s8 last_ack_signal;
+ bool ack_signal_filled;
} status_stats;
/* Updated from TX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index da7427a41529..743e89c5926c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
struct ieee80211_mgmt *mgmt = (void *) skb->data;
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
- if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
sta->status_stats.last_ack = jiffies;
+ if (txinfo->status.is_valid_ack_signal) {
+ sta->status_stats.last_ack_signal =
+ (s8)txinfo->status.ack_signal;
+ sta->status_stats.ack_signal_filled = true;
+ }
+ }
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -487,6 +494,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
ieee80211_is_qos_nullfunc(hdr->frame_control))
cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked,
+ info->status.ack_signal,
+ info->status.is_valid_ack_signal,
GFP_ATOMIC);
else
cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 25904af38839..535de3161a78 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- u8 *qc;
int tid;
/*
@@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
return TX_CONTINUE;
/* include per-STA, per-TID sequence counter */
-
- qc = ieee80211_get_qos_ctl(hdr);
- tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tx->sta->tx_stats.msdu[tid]++;
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
int tid;
- u8 *qc;
memset(tx, 0, sizeof(*tx));
tx->skb = skb;
@@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
!ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
struct tid_ampdu_tx *tid_tx;
- qc = ieee80211_get_qos_ctl(hdr);
- tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+ tid = ieee80211_get_tid(hdr);
tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
if (tid_tx) {
@@ -1921,7 +1916,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_hdr *hdr;
int headroom;
bool may_encrypt;
@@ -3574,6 +3569,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
if (!IS_ERR_OR_NULL(sta)) {
struct ieee80211_fast_tx *fast_tx;
+ /* We need a bit of data queued to build aggregates properly, so
+ * instruct the TCP stack to allow more than a single ms of data
+ * to be queued in the stack. The value is a bit-shift of 1
+ * second, so 8 is ~4ms of queued data. Only affects local TCP
+ * sockets.
+ */
+ sk_pacing_shift_update(skb->sk, 8);
+
fast_tx = rcu_dereference(sta->fast_tx);
if (fast_tx &&
@@ -4754,3 +4757,49 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
ieee80211_xmit(sdata, NULL, skb);
local_bh_enable();
}
+
+int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *buf, size_t len,
+ const u8 *dest, __be16 proto, bool unencrypted)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *skb;
+ struct ethhdr *ehdr;
+ u32 flags;
+
+ /* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
+ * or Pre-Authentication
+ */
+ if (proto != sdata->control_port_protocol &&
+ proto != cpu_to_be16(ETH_P_PREAUTH))
+ return -EINVAL;
+
+ if (unencrypted)
+ flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
+ else
+ flags = 0;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(struct ethhdr) + len);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom + sizeof(struct ethhdr));
+
+ skb_put_data(skb, buf, len);
+
+ ehdr = skb_push(skb, sizeof(struct ethhdr));
+ memcpy(ehdr->h_dest, dest, ETH_ALEN);
+ memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN);
+ ehdr->h_proto = proto;
+
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_802_3);
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
+ __ieee80211_subif_start_xmit(skb, skb->dev, flags);
+
+ return 0;
+}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1f82191ce601..11f9cfc016d9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -5,6 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1113,6 +1114,48 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
return crc;
}
+void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_tx_queue_params
+ *qparam, int ac)
+{
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ const struct ieee80211_reg_rule *rrule;
+ struct ieee80211_wmm_ac *wmm_ac;
+ u16 center_freq = 0;
+
+ if (sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (chanctx_conf)
+ center_freq = chanctx_conf->def.chan->center_freq;
+
+ if (!center_freq) {
+ rcu_read_unlock();
+ return;
+ }
+
+ rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq));
+
+ if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ wmm_ac = &rrule->wmm_rule->ap[ac];
+ else
+ wmm_ac = &rrule->wmm_rule->client[ac];
+ qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
+ qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
+ qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
+ qparam->txop = !qparam->txop ? wmm_ac->cot / 32 :
+ min_t(u16, qparam->txop, wmm_ac->cot / 32);
+ rcu_read_unlock();
+}
+
void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
bool bss_notify, bool enable_qos)
{
@@ -1206,6 +1249,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
break;
}
}
+ ieee80211_regulatory_limit_wmm_params(sdata, &qparam, ac);
qparam.uapsd = false;
@@ -1968,7 +2012,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
BSS_CHANGED_CQM |
BSS_CHANGED_QOS |
BSS_CHANGED_IDLE |
- BSS_CHANGED_TXPOWER;
+ BSS_CHANGED_TXPOWER |
+ BSS_CHANGED_MCAST_RATE;
if (sdata->vif.mu_mimo_owner)
changed |= BSS_CHANGED_MU_GROUPS;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b9276ac849fa..259325cbcc31 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -358,6 +358,36 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
return NL80211_CHAN_WIDTH_80;
}
+enum nl80211_chan_width
+ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta)
+{
+ enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ u32 cap_width;
+
+ switch (cur_bw) {
+ case IEEE80211_STA_RX_BW_20:
+ if (!sta->sta.ht_cap.ht_supported)
+ return NL80211_CHAN_WIDTH_20_NOHT;
+ else
+ return NL80211_CHAN_WIDTH_20;
+ case IEEE80211_STA_RX_BW_40:
+ return NL80211_CHAN_WIDTH_40;
+ case IEEE80211_STA_RX_BW_80:
+ return NL80211_CHAN_WIDTH_80;
+ case IEEE80211_STA_RX_BW_160:
+ cap_width =
+ vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+
+ if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
+ return NL80211_CHAN_WIDTH_160;
+
+ return NL80211_CHAN_WIDTH_80P80;
+ default:
+ return NL80211_CHAN_WIDTH_20;
+ }
+}
+
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
{
@@ -447,6 +477,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
enum nl80211_band band)
{
enum ieee80211_sta_rx_bandwidth new_bw;
+ struct sta_opmode_info sta_opmode = {};
u32 changed = 0;
u8 nss;
@@ -460,7 +491,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
if (sta->sta.rx_nss != nss) {
sta->sta.rx_nss = nss;
+ sta_opmode.rx_nss = nss;
changed |= IEEE80211_RC_NSS_CHANGED;
+ sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
}
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
@@ -481,9 +514,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
new_bw = ieee80211_sta_cur_vht_bw(sta);
if (new_bw != sta->sta.bandwidth) {
sta->sta.bandwidth = new_bw;
+ sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta);
changed |= IEEE80211_RC_BW_CHANGED;
+ sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
}
+ if (sta_opmode.changed)
+ cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+ &sta_opmode, GFP_KERNEL);
+
return changed;
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 785056cb76f6..58d0b258b684 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
a4_included = ieee80211_has_a4(hdr->frame_control);
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
@@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
aad[23] = 0;
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
@@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = *ieee80211_get_qos_ctl(hdr) &
- IEEE80211_QOS_CTL_TID_MASK;
+ qos_tid = ieee80211_get_tid(hdr);
else
qos_tid = 0;
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 2c8a43d3607f..df855c33daf2 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -33,7 +33,7 @@
/* Tracing for driver callbacks */
-DECLARE_EVENT_CLASS(local_only_evt,
+DECLARE_EVENT_CLASS(local_only_evt4,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local),
TP_STRUCT__entry(
@@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt,
TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
);
-DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
+DEFINE_EVENT(local_only_evt4, 802154_drv_return_void,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
@@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int,
__entry->ret)
);
-DEFINE_EVENT(local_only_evt, 802154_drv_start,
+DEFINE_EVENT(local_only_evt4, 802154_drv_start,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
-DEFINE_EVENT(local_only_evt, 802154_drv_stop,
+DEFINE_EVENT(local_only_evt4, 802154_drv_stop,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e545a3c9365f..7a4de6d618b1 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -122,7 +122,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->len <= mtu)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
index dd12b564f2e7..436ef68331f2 100644
--- a/net/ncsi/Makefile
+++ b/net/ncsi/Makefile
@@ -1,4 +1,4 @@
#
# Makefile for NCSI API
#
-obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index d30f7bd741d0..8da84312cd3b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -276,6 +276,8 @@ struct ncsi_dev_priv {
unsigned int package_num; /* Number of packages */
struct list_head packages; /* List of packages */
struct ncsi_channel *hot_channel; /* Channel was ever active */
+ struct ncsi_package *force_package; /* Force a specific package */
+ struct ncsi_channel *force_channel; /* Force a specific channel */
struct ncsi_request requests[256]; /* Request table */
unsigned int request_id; /* Last used request ID */
#define NCSI_REQ_START_IDX 1
@@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock;
list_for_each_entry_rcu(nc, &np->channels, node)
/* Resources */
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c989211bbabc..c3695ba0cf94 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -12,7 +12,6 @@
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <net/ncsi.h>
#include <net/net_namespace.h>
@@ -23,6 +22,7 @@
#include "internal.h"
#include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
LIST_HEAD(ncsi_dev_list);
DEFINE_SPINLOCK(ncsi_dev_lock);
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
return sizes[table];
}
-static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
{
struct ncsi_channel_filter *ncf;
int size;
@@ -965,20 +965,37 @@ error:
static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
{
- struct ncsi_package *np;
- struct ncsi_channel *nc, *found, *hot_nc;
+ struct ncsi_package *np, *force_package;
+ struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
struct ncsi_channel_mode *ncm;
unsigned long flags;
spin_lock_irqsave(&ndp->lock, flags);
hot_nc = ndp->hot_channel;
+ force_channel = ndp->force_channel;
+ force_package = ndp->force_package;
spin_unlock_irqrestore(&ndp->lock, flags);
+ /* Force a specific channel whether or not it has link if we have been
+ * configured to do so
+ */
+ if (force_package && force_channel) {
+ found = force_channel;
+ ncm = &found->modes[NCSI_MODE_LINK];
+ if (!(ncm->data[2] & 0x1))
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Channel %u forced, but it is link down\n",
+ found->id);
+ goto out;
+ }
+
/* The search is done once an inactive channel with up
* link is found.
*/
found = NULL;
NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (ndp->force_package && np != ndp->force_package)
+ continue;
NCSI_FOR_EACH_CHANNEL(np, nc) {
spin_lock_irqsave(&nc->lock, flags);
@@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
ndp->ptype.dev = dev;
dev_add_pack(&ndp->ptype);
+ /* Set up generic netlink interface */
+ ncsi_init_netlink(dev);
+
return nd;
}
EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
#endif
spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+ ncsi_unregister_netlink(nd->dev);
+
kfree(ndp);
}
EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
new file mode 100644
index 000000000000..8d7e849d4825
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <net/genetlink.h>
+#include <net/ncsi.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/ncsi.h>
+
+#include "internal.h"
+#include "ncsi-netlink.h"
+
+static struct genl_family ncsi_genl_family;
+
+static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
+ [NCSI_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [NCSI_ATTR_PACKAGE_LIST] = { .type = NLA_NESTED },
+ [NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 },
+ [NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 },
+};
+
+static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
+{
+ struct ncsi_dev_priv *ndp;
+ struct net_device *dev;
+ struct ncsi_dev *nd;
+ struct ncsi_dev;
+
+ if (!net)
+ return NULL;
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev) {
+ pr_err("NCSI netlink: No device for ifindex %u\n", ifindex);
+ return NULL;
+ }
+
+ nd = ncsi_find_dev(dev);
+ ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+
+ dev_put(dev);
+ return ndp;
+}
+
+static int ncsi_write_channel_info(struct sk_buff *skb,
+ struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *nc)
+{
+ struct nlattr *vid_nest;
+ struct ncsi_channel_filter *ncf;
+ struct ncsi_channel_mode *m;
+ u32 *data;
+ int i;
+
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
+ m = &nc->modes[NCSI_MODE_LINK];
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
+ if (nc->state == NCSI_CHANNEL_ACTIVE)
+ nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
+ if (ndp->force_channel == nc)
+ nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
+
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
+
+ vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+ if (!vid_nest)
+ return -ENOMEM;
+ ncf = nc->filters[NCSI_FILTER_VLAN];
+ i = -1;
+ if (ncf) {
+ while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
+ i + 1)) < ncf->total) {
+ data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
+ /* Uninitialised channels will have 'zero' vlan ids */
+ if (!data || !*data)
+ continue;
+ nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
+ *(u16 *)data);
+ }
+ }
+ nla_nest_end(skb, vid_nest);
+
+ return 0;
+}
+
+static int ncsi_write_package_info(struct sk_buff *skb,
+ struct ncsi_dev_priv *ndp, unsigned int id)
+{
+ struct nlattr *pnest, *cnest, *nest;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ bool found;
+ int rc;
+
+ if (id > ndp->package_num) {
+ netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
+ return -ENODEV;
+ }
+
+ found = false;
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (np->id != id)
+ continue;
+ pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+ if (!pnest)
+ return -ENOMEM;
+ nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ if (ndp->force_package == np)
+ nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+ cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+ if (!cnest) {
+ nla_nest_cancel(skb, pnest);
+ return -ENOMEM;
+ }
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+ if (!nest) {
+ nla_nest_cancel(skb, cnest);
+ nla_nest_cancel(skb, pnest);
+ return -ENOMEM;
+ }
+ rc = ncsi_write_channel_info(skb, ndp, nc);
+ if (rc) {
+ nla_nest_cancel(skb, nest);
+ nla_nest_cancel(skb, cnest);
+ nla_nest_cancel(skb, pnest);
+ return rc;
+ }
+ nla_nest_end(skb, nest);
+ }
+ nla_nest_end(skb, cnest);
+ nla_nest_end(skb, pnest);
+ found = true;
+ }
+
+ if (!found)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
+{
+ struct ncsi_dev_priv *ndp;
+ unsigned int package_id;
+ struct sk_buff *skb;
+ struct nlattr *attr;
+ void *hdr;
+ int rc;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(genl_info_net(info),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+ if (!hdr) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+
+ attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+ if (!attr) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+ rc = ncsi_write_package_info(skb, ndp, package_id);
+
+ if (rc) {
+ nla_nest_cancel(skb, attr);
+ goto err;
+ }
+
+ nla_nest_end(skb, attr);
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_reply(skb, info);
+
+err:
+ genlmsg_cancel(skb, hdr);
+ kfree_skb(skb);
+ return rc;
+}
+
+static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *attrs[NCSI_ATTR_MAX];
+ struct ncsi_package *np, *package;
+ struct ncsi_dev_priv *ndp;
+ unsigned int package_id;
+ struct nlattr *attr;
+ void *hdr;
+ int rc;
+
+ rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+ ncsi_genl_policy, NULL);
+ if (rc)
+ return rc;
+
+ if (!attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)),
+ nla_get_u32(attrs[NCSI_ATTR_IFINDEX]));
+
+ if (!ndp)
+ return -ENODEV;
+
+ package_id = cb->args[0];
+ package = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ if (np->id == package_id)
+ package = np;
+
+ if (!package)
+ return 0; /* done */
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+ if (!hdr) {
+ rc = -EMSGSIZE;
+ goto err;
+ }
+
+ attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+ rc = ncsi_write_package_info(skb, ndp, package->id);
+ if (rc) {
+ nla_nest_cancel(skb, attr);
+ goto err;
+ }
+
+ nla_nest_end(skb, attr);
+ genlmsg_end(skb, hdr);
+
+ cb->args[0] = package_id + 1;
+
+ return skb->len;
+err:
+ genlmsg_cancel(skb, hdr);
+ return rc;
+}
+
+static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+ struct ncsi_package *np, *package;
+ struct ncsi_channel *nc, *channel;
+ u32 package_id, channel_id;
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+ package = NULL;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ if (np->id == package_id)
+ package = np;
+ if (!package) {
+ /* The user has set a package that does not exist */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return -ERANGE;
+ }
+
+ channel = NULL;
+ if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+ /* Allow any channel */
+ channel_id = NCSI_RESERVED_CHANNEL;
+ } else {
+ channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+ NCSI_FOR_EACH_CHANNEL(package, nc)
+ if (nc->id == channel_id)
+ channel = nc;
+ }
+
+ if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
+ /* The user has set a channel that does not exist on this
+ * package
+ */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
+ channel_id);
+ return -ERANGE;
+ }
+
+ ndp->force_package = package;
+ ndp->force_channel = channel;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
+ package_id, channel_id,
+ channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+
+ /* Bounce the NCSI channel to set changes */
+ ncsi_stop_dev(&ndp->ndev);
+ ncsi_start_dev(&ndp->ndev);
+
+ return 0;
+}
+
+static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ /* Clear any override */
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->force_package = NULL;
+ ndp->force_channel = NULL;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
+
+ /* Bounce the NCSI channel to set changes */
+ ncsi_stop_dev(&ndp->ndev);
+ ncsi_start_dev(&ndp->ndev);
+
+ return 0;
+}
+
+static const struct genl_ops ncsi_ops[] = {
+ {
+ .cmd = NCSI_CMD_PKG_INFO,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_pkg_info_nl,
+ .dumpit = ncsi_pkg_info_all_nl,
+ .flags = 0,
+ },
+ {
+ .cmd = NCSI_CMD_SET_INTERFACE,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_set_interface_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = NCSI_CMD_CLEAR_INTERFACE,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_clear_interface_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static struct genl_family ncsi_genl_family __ro_after_init = {
+ .name = "NCSI",
+ .version = 0,
+ .maxattr = NCSI_ATTR_MAX,
+ .module = THIS_MODULE,
+ .ops = ncsi_ops,
+ .n_ops = ARRAY_SIZE(ncsi_ops),
+};
+
+int ncsi_init_netlink(struct net_device *dev)
+{
+ int rc;
+
+ rc = genl_register_family(&ncsi_genl_family);
+ if (rc)
+ netdev_err(dev, "ncsi: failed to register netlink family\n");
+
+ return rc;
+}
+
+int ncsi_unregister_netlink(struct net_device *dev)
+{
+ int rc;
+
+ rc = genl_unregister_family(&ncsi_genl_family);
+ if (rc)
+ netdev_err(dev, "ncsi: failed to unregister netlink family\n");
+
+ return rc;
+}
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
new file mode 100644
index 000000000000..91a5c256f8c4
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_NETLINK_H__
+#define __NCSI_NETLINK_H__
+
+#include <linux/netdevice.h>
+
+#include "internal.h"
+
+int ncsi_init_netlink(struct net_device *dev);
+int ncsi_unregister_netlink(struct net_device *dev);
+
+#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d3220b43c832..704b3832dbad 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -465,12 +465,12 @@ config NF_TABLES_INET
depends on IPV6
select NF_TABLES_IPV4
select NF_TABLES_IPV6
- tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
+ bool "Netfilter nf_tables mixed IPv4/IPv6 tables support"
help
This option enables support for a mixed IPv4/IPv6 "inet" table.
config NF_TABLES_NETDEV
- tristate "Netfilter nf_tables netdev tables support"
+ bool "Netfilter nf_tables netdev tables support"
help
This option enables support for the "netdev" table.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5d9b8b959e58..fd32bd2c9521 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -73,13 +73,12 @@ obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
# nf_tables
-nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
- nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
- nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
+ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
+ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
+ nft_dynset.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
-obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o
-obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 975a85a48d39..bc4bd247bb7d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2094,7 +2094,7 @@ static struct pernet_operations ip_set_net_ops = {
.init = ip_set_net_init,
.exit = ip_set_net_exit,
.id = &ip_set_net_id,
- .size = sizeof(struct ip_set_net)
+ .size = sizeof(struct ip_set_net),
};
static int __init
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 8f004edad396..f9d5a2a1e3d0 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -72,9 +72,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next,
#define IP_SET_PROTO_UNDEF
#include "ip_set_hash_gen.h"
-/* Zero valued element is not supported */
-static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
-
static int
hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -93,7 +90,7 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
- if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ if (is_zero_ether_addr(e.ether))
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -118,7 +115,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
- if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ if (is_zero_ether_addr(e.ether))
return -IPSET_ERR_HASH_ELEM;
return adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 3e17d32b629d..58d5d05aec24 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
buf_len = strlen(buf);
ct = nf_ct_get(skb, &ctinfo);
- if (ct && (ct->status & IPS_NAT_MASK)) {
+ if (ct) {
bool mangled;
/* If mangling fails this function will return 0
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index d625179de485..3057e453bf31 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
int i;
spin_lock_bh(&svc->sched_lock);
- tbl->dead = 1;
+ tbl->dead = true;
for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_del(en);
@@ -369,7 +369,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
- tbl->dead = 0;
+ tbl->dead = false;
tbl->svc = svc;
/*
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 84c57b62a588..92adc04557ed 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -404,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
struct hlist_node *next;
spin_lock_bh(&svc->sched_lock);
- tbl->dead = 1;
+ tbl->dead = true;
for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
@@ -532,7 +532,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
- tbl->dead = 0;
+ tbl->dead = false;
tbl->svc = svc;
/*
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 6d65389e308f..153e690e2893 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -104,7 +104,7 @@ static unsigned int check_hlist(struct net *net,
struct nf_conn *found_ct;
unsigned int length = 0;
- *addit = true;
+ *addit = tuple ? true : false;
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
@@ -117,7 +117,7 @@ static unsigned int check_hlist(struct net *net,
found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
+ if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
@@ -158,7 +158,6 @@ static void tree_nodes_free(struct rb_root *root,
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const u32 *key, u8 keylen,
- u8 family,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
@@ -221,6 +220,9 @@ count_tree(struct net *net, struct rb_root *root,
goto restart;
}
+ if (!tuple)
+ return 0;
+
/* no match, need to insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
@@ -243,10 +245,12 @@ count_tree(struct net *net, struct rb_root *root,
return 1;
}
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ */
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
- unsigned int family,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
@@ -259,7 +263,7 @@ unsigned int nf_conncount_count(struct net *net,
spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
- count = count_tree(net, root, key, data->keylen, family, tuple, zone);
+ count = count_tree(net, root, key, data->keylen, tuple, zone);
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 866916712905..1d66de5151b2 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -8,6 +8,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/netfilter.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -80,7 +82,7 @@ static int nf_conntrack_acct_init_sysctl(struct net *net)
net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.acct_sysctl_header) {
- printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+ pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
@@ -125,7 +127,7 @@ int nf_conntrack_acct_init(void)
{
int ret = nf_ct_extend_register(&acct_extend);
if (ret < 0)
- pr_err("nf_conntrack_acct: Unable to register extension\n");
+ pr_err("Unable to register extension\n");
return ret;
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index ecc3ab784633..a1086bdec242 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -20,7 +20,6 @@
#include <net/netfilter/nf_conntrack_expect.h>
int nf_conntrack_broadcast_help(struct sk_buff *skb,
- unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int timeout)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 705198de671d..41ff04ee2554 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1763,14 +1763,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
{
struct net *net;
- rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net) {
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net);
}
- rtnl_unlock();
+ up_read(&net_rwsem);
/* Need to wait for netns cleanup worker to finish, if its
* running -- it might have deleted a net namespace from
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index caac41ad9483..c11822a7d2bf 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -11,6 +11,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/skbuff.h>
@@ -372,7 +374,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
net->ct.event_sysctl_header =
register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.event_sysctl_header) {
- printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
+ pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
@@ -419,7 +421,7 @@ int nf_conntrack_ecache_init(void)
{
int ret = nf_ct_extend_register(&event_extend);
if (ret < 0)
- pr_err("nf_ct_event: Unable to register event extension.\n");
+ pr_err("Unable to register event extension\n");
BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 496ce173f0c1..bac5848f1c8e 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -33,7 +33,7 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
MODULE_ALIAS_NFCT_HELPER("netbios_ns");
static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
static struct nf_conntrack_expect_policy exp_policy = {
@@ -41,9 +41,10 @@ static struct nf_conntrack_expect_policy exp_policy = {
};
static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
{
- return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+ return nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
}
static struct nf_conntrack_helper helper __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dd177ebee9aa..4c1d0c5bc268 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -440,6 +440,31 @@ err:
return -1;
}
+static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct)
+{
+ struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+ struct nlattr *nest_parms;
+
+ if (!synproxy)
+ return 0;
+
+ nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, CTA_SYNPROXY_ISN, htonl(synproxy->isn)) ||
+ nla_put_be32(skb, CTA_SYNPROXY_ITS, htonl(synproxy->its)) ||
+ nla_put_be32(skb, CTA_SYNPROXY_TSOFF, htonl(synproxy->tsoff)))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
{
if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
@@ -518,7 +543,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+ ctnetlink_dump_ct_synproxy(skb, ct) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -730,6 +756,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (events & (1 << IPCT_SEQADJ) &&
ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
+
+ if (events & (1 << IPCT_SYNPROXY) &&
+ ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ goto nla_put_failure;
}
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -1497,9 +1527,8 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
if (ret < 0)
return ret;
- ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
- cda[CTA_NAT_SRC]);
- return ret;
+ return ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
#else
if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
return 0;
@@ -1689,6 +1718,39 @@ err:
return ret;
}
+static const struct nla_policy synproxy_policy[CTA_SYNPROXY_MAX + 1] = {
+ [CTA_SYNPROXY_ISN] = { .type = NLA_U32 },
+ [CTA_SYNPROXY_ITS] = { .type = NLA_U32 },
+ [CTA_SYNPROXY_TSOFF] = { .type = NLA_U32 },
+};
+
+static int ctnetlink_change_synproxy(struct nf_conn *ct,
+ const struct nlattr * const cda[])
+{
+ struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+ struct nlattr *tb[CTA_SYNPROXY_MAX + 1];
+ int err;
+
+ if (!synproxy)
+ return 0;
+
+ err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY],
+ synproxy_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[CTA_SYNPROXY_ISN] ||
+ !tb[CTA_SYNPROXY_ITS] ||
+ !tb[CTA_SYNPROXY_TSOFF])
+ return -EINVAL;
+
+ synproxy->isn = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ISN]));
+ synproxy->its = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ITS]));
+ synproxy->tsoff = ntohl(nla_get_be32(tb[CTA_SYNPROXY_TSOFF]));
+
+ return 0;
+}
+
static int
ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -1759,6 +1821,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
return err;
}
+ if (cda[CTA_SYNPROXY]) {
+ err = ctnetlink_change_synproxy(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
if (cda[CTA_LABELS]) {
err = ctnetlink_attach_labels(ct, cda);
if (err < 0)
@@ -1880,6 +1948,12 @@ ctnetlink_create_conntrack(struct net *net,
goto err2;
}
+ if (cda[CTA_SYNPROXY]) {
+ err = ctnetlink_change_synproxy(ct, cda);
+ if (err < 0)
+ goto err2;
+ }
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
@@ -1991,7 +2065,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_SEQADJ) |
- (1 << IPCT_MARK) | events,
+ (1 << IPCT_MARK) |
+ (1 << IPCT_SYNPROXY) |
+ events,
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_put(ct);
@@ -2012,7 +2088,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
(1 << IPCT_LABEL) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_SEQADJ) |
- (1 << IPCT_MARK),
+ (1 << IPCT_MARK) |
+ (1 << IPCT_SYNPROXY),
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
@@ -2282,6 +2359,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ goto nla_put_failure;
+
#ifdef CONFIG_NF_CONNTRACK_MARK
if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 87b95a2c270c..b8e0a22ca1a9 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -26,7 +26,7 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_NFCT_HELPER("snmp");
static unsigned int timeout __read_mostly = 30;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
int (*nf_nat_snmp_hook)(struct sk_buff *skb,
@@ -36,11 +36,12 @@ int (*nf_nat_snmp_hook)(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
{
typeof(nf_nat_snmp_hook) nf_nat_snmp;
- nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+ nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9123fdec5e14..037fec54c850 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -495,7 +495,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(pde, root_uid, root_gid);
- pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
+ pde = proc_create("nf_conntrack", 0444, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
goto out_stat_nf_conntrack;
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 4c4734b78318..56766cb26e40 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -6,6 +6,8 @@
* published by the Free Software Foundation (or any later at your option).
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/netfilter.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -58,7 +60,7 @@ static int nf_conntrack_tstamp_init_sysctl(struct net *net)
net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.tstamp_sysctl_header) {
- printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+ pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
@@ -104,7 +106,7 @@ int nf_conntrack_tstamp_init(void)
int ret;
ret = nf_ct_extend_register(&tstamp_extend);
if (ret < 0)
- pr_err("nf_ct_tstamp: Unable to register extension\n");
+ pr_err("Unable to register extension\n");
return ret;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index c2c1b16b7538..6d0357817cda 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -549,7 +549,7 @@ static int __net_init nf_log_net_init(struct net *net)
int ret = -ENOMEM;
#ifdef CONFIG_PROC_FS
- if (!proc_create("nf_log", S_IRUGO,
+ if (!proc_create("nf_log", 0444,
net->nf.proc_netfilter, &nflog_file_ops))
return ret;
#endif
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6c38421e31f9..617693ff9f4c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -8,6 +8,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/types.h>
#include <linux/timer.h>
@@ -814,7 +816,7 @@ static int __init nf_nat_init(void)
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
- printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+ pr_err("Unable to register extension\n");
return ret;
}
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d76afafdc699..5063cbf1689c 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -8,6 +8,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/inet.h>
@@ -71,7 +73,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
unsigned int buflen;
- pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+ pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
/* Connection will come from wherever this packet goes, hence !dir */
newaddr = ct->tuplehash[!dir].tuple.dst.u3;
@@ -136,8 +138,7 @@ static int __init nf_nat_ftp_init(void)
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
static int warn_set(const char *val, const struct kernel_param *kp)
{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
return 0;
}
module_param_call(ports, warn_set, NULL, NULL, 0);
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dcb5f6375d9d..3aa35a43100d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -10,6 +10,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/tcp.h>
@@ -79,7 +81,7 @@ static unsigned int help(struct sk_buff *skb,
*/
/* AAA = "us", ie. where server normally talks to. */
snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port);
- pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
+ pr_debug("inserting '%s' == %pI4, port %u\n",
buffer, &newaddr.ip, port);
if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
@@ -108,8 +110,7 @@ static int __init nf_nat_irc_init(void)
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
static int warn_set(const char *val, const struct kernel_param *kp)
{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
return 0;
}
module_param_call(ports, warn_set, NULL, NULL, 0);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..6039b350abbe 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -325,7 +325,7 @@ static const struct file_operations synproxy_cpu_seq_fops = {
static int __net_init synproxy_proc_init(struct net *net)
{
- if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+ if (!proc_create("synproxy", 0444, net->proc_net_stat,
&synproxy_cpu_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8b9fe30de0cd..9134cc429ad4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -74,15 +74,77 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+/* removal requests are queued in the commit_list, but not acted upon
+ * until after all new rules are in place.
+ *
+ * Therefore, nf_register_net_hook(net, &nat_hook) runs before pending
+ * nf_unregister_net_hook().
+ *
+ * nf_register_net_hook thus fails if a nat hook is already in place
+ * even if the conflicting hook is about to be removed.
+ *
+ * If collision is detected, search commit_log for DELCHAIN matching
+ * the new nat hooknum; if we find one collision is temporary:
+ *
+ * Either transaction is aborted (new/colliding hook is removed), or
+ * transaction is committed (old hook is removed).
+ */
+static bool nf_tables_allow_nat_conflict(const struct net *net,
+ const struct nf_hook_ops *ops)
+{
+ const struct nft_trans *trans;
+ bool ret = false;
+
+ if (!ops->nat_hook)
+ return false;
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ const struct nf_hook_ops *pending_ops;
+ const struct nft_chain *pending;
+
+ if (trans->msg_type != NFT_MSG_NEWCHAIN &&
+ trans->msg_type != NFT_MSG_DELCHAIN)
+ continue;
+
+ pending = trans->ctx.chain;
+ if (!nft_is_base_chain(pending))
+ continue;
+
+ pending_ops = &nft_base_chain(pending)->ops;
+ if (pending_ops->nat_hook &&
+ pending_ops->pf == ops->pf &&
+ pending_ops->hooknum == ops->hooknum) {
+ /* other hook registration already pending? */
+ if (trans->msg_type == NFT_MSG_NEWCHAIN)
+ return false;
+
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
+ struct nf_hook_ops *ops;
+ int ret;
+
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return 0;
- return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ ops = &nft_base_chain(chain)->ops;
+ ret = nf_register_net_hook(net, ops);
+ if (ret == -EBUSY && nf_tables_allow_nat_conflict(net, ops)) {
+ ops->nat_hook = false;
+ ret = nf_register_net_hook(net, ops);
+ ops->nat_hook = true;
+ }
+
+ return ret;
}
static void nf_tables_unregister_hook(struct net *net,
@@ -384,9 +446,9 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
return ++table->hgenerator;
}
-static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
+static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
-static const struct nf_chain_type *
+static const struct nft_chain_type *
__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
int i;
@@ -399,10 +461,10 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
return NULL;
}
-static const struct nf_chain_type *
+static const struct nft_chain_type *
nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
{
- const struct nf_chain_type *type;
+ const struct nft_chain_type *type;
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
@@ -859,26 +921,22 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
kfree(ctx->table);
}
-int nft_register_chain_type(const struct nf_chain_type *ctype)
+void nft_register_chain_type(const struct nft_chain_type *ctype)
{
- int err = 0;
-
if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
- return -EINVAL;
+ return;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- if (chain_type[ctype->family][ctype->type] != NULL) {
- err = -EBUSY;
- goto out;
+ if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return;
}
chain_type[ctype->family][ctype->type] = ctype;
-out:
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- return err;
}
EXPORT_SYMBOL_GPL(nft_register_chain_type);
-void nft_unregister_chain_type(const struct nf_chain_type *ctype)
+void nft_unregister_chain_type(const struct nft_chain_type *ctype)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
chain_type[ctype->family][ctype->type] = NULL;
@@ -1215,19 +1273,21 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
rcu_assign_pointer(chain->stats, newstats);
}
-static void nf_tables_chain_destroy(struct nft_chain *chain)
+static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
+ struct nft_chain *chain = ctx->chain;
+
BUG_ON(chain->use > 0);
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (basechain->type->free)
+ basechain->type->free(ctx);
module_put(basechain->type->owner);
free_percpu(basechain->stats);
if (basechain->stats)
static_branch_dec(&nft_counters_enabled);
- if (basechain->ops.dev != NULL)
- dev_put(basechain->ops.dev);
kfree(chain->name);
kfree(basechain);
} else {
@@ -1239,7 +1299,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
struct nft_chain_hook {
u32 num;
s32 priority;
- const struct nf_chain_type *type;
+ const struct nft_chain_type *type;
struct net_device *dev;
};
@@ -1249,7 +1309,7 @@ static int nft_chain_parse_hook(struct net *net,
bool create)
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
- const struct nf_chain_type *type;
+ const struct nft_chain_type *type;
struct net_device *dev;
int err;
@@ -1294,7 +1354,7 @@ static int nft_chain_parse_hook(struct net *net,
}
nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(net, ifname);
if (!dev) {
module_put(type->owner);
return -ENOENT;
@@ -1311,8 +1371,6 @@ static int nft_chain_parse_hook(struct net *net,
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
module_put(hook->type->owner);
- if (hook->dev != NULL)
- dev_put(hook->dev);
}
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
@@ -1358,6 +1416,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
}
basechain->type = hook.type;
+ if (basechain->type->init)
+ basechain->type->init(ctx);
+
chain = &basechain->chain;
ops = &basechain->ops;
@@ -1378,6 +1439,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (chain == NULL)
return -ENOMEM;
}
+ ctx->chain = chain;
+
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
@@ -1391,7 +1454,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err1;
- ctx->chain = chain;
err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
@@ -1403,7 +1465,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
err2:
nf_tables_unregister_hook(net, table, chain);
err1:
- nf_tables_chain_destroy(chain);
+ nf_tables_chain_destroy(ctx);
return err;
}
@@ -1911,6 +1973,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
+ [NFTA_RULE_ID] = { .type = NLA_U32 },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -2446,6 +2509,9 @@ EXPORT_SYMBOL_GPL(nft_unregister_set);
static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
{
+ if ((flags & NFT_SET_EVAL) && !ops->update)
+ return false;
+
return (flags & ops->features) == (flags & NFT_SET_FEATURES);
}
@@ -2510,7 +2576,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (est.space == best.space &&
est.lookup < best.lookup)
break;
- } else if (est.size < best.size) {
+ } else if (est.size < best.size || !bops) {
break;
}
continue;
@@ -2629,11 +2695,11 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
return ERR_PTR(-ENOENT);
}
-struct nft_set *nft_set_lookup(const struct net *net,
- const struct nft_table *table,
- const struct nlattr *nla_set_name,
- const struct nlattr *nla_set_id,
- u8 genmask)
+struct nft_set *nft_set_lookup_global(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask)
{
struct nft_set *set;
@@ -2646,7 +2712,7 @@ struct nft_set *nft_set_lookup(const struct net *net,
}
return set;
}
-EXPORT_SYMBOL_GPL(nft_set_lookup);
+EXPORT_SYMBOL_GPL(nft_set_lookup_global);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -3315,6 +3381,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
[NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
+ [NFTA_SET_ELEM_EXPR] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -4028,17 +4096,10 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
- genmask);
- if (IS_ERR(set)) {
- if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
- set = nf_tables_set_lookup_byid(net,
- nla[NFTA_SET_ELEM_LIST_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
@@ -4328,9 +4389,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
-struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
- const struct nlattr *nla,
- u32 objtype, u8 genmask)
+static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla,
+ u32 objtype, u8 genmask)
{
struct nft_object *obj;
@@ -4357,16 +4418,20 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
const struct nft_object_type *type,
const struct nlattr *attr)
{
- struct nlattr *tb[type->maxattr + 1];
+ struct nlattr **tb;
const struct nft_object_ops *ops;
struct nft_object *obj;
- int err;
+ int err = -ENOMEM;
+
+ tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ goto err1;
if (attr) {
err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
NULL);
if (err < 0)
- goto err1;
+ goto err2;
} else {
memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
}
@@ -4375,7 +4440,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
if (IS_ERR(ops)) {
err = PTR_ERR(ops);
- goto err1;
+ goto err2;
}
} else {
ops = type->ops;
@@ -4383,18 +4448,21 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
err = -ENOMEM;
obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
- if (obj == NULL)
- goto err1;
+ if (!obj)
+ goto err2;
err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
- goto err2;
+ goto err3;
obj->ops = ops;
+ kfree(tb);
return obj;
-err2:
+err3:
kfree(obj);
+err2:
+ kfree(tb);
err1:
return ERR_PTR(err);
}
@@ -4850,7 +4918,7 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
-struct nft_flowtable *
+static struct nft_flowtable *
nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
@@ -4864,8 +4932,6 @@ nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
-#define NFT_FLOWTABLE_DEVICE_MAX 8
-
static int nf_tables_parse_devices(const struct nft_ctx *ctx,
const struct nlattr *attr,
struct net_device *dev_array[], int *len)
@@ -4882,7 +4948,7 @@ static int nf_tables_parse_devices(const struct nft_ctx *ctx,
}
nla_strlcpy(ifname, tmp, IFNAMSIZ);
- dev = dev_get_by_name(ctx->net, ifname);
+ dev = __dev_get_by_name(ctx->net, ifname);
if (!dev) {
err = -ENOENT;
goto err1;
@@ -4938,13 +5004,11 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
dev_array, &n);
if (err < 0)
- goto err1;
+ return err;
ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
- if (!ops) {
- err = -ENOMEM;
- goto err1;
- }
+ if (!ops)
+ return -ENOMEM;
flowtable->hooknum = hooknum;
flowtable->priority = priority;
@@ -4958,13 +5022,10 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
flowtable->ops[i].priv = &flowtable->data.rhashtable;
flowtable->ops[i].hook = flowtable->data.type->hook;
flowtable->ops[i].dev = dev_array[i];
+ flowtable->dev_name[i] = kstrdup(dev_array[i]->name,
+ GFP_KERNEL);
}
- err = 0;
-err1:
- for (i = 0; i < n; i++)
- dev_put(dev_array[i]);
-
return err;
}
@@ -5037,9 +5098,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nf_flowtable_type *type;
+ struct nft_flowtable *flowtable, *ft;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_flowtable *flowtable;
struct nft_table *table;
struct nft_ctx ctx;
int err, i, k;
@@ -5099,6 +5160,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err3;
for (i = 0; i < flowtable->ops_len; i++) {
+ if (!flowtable->ops[i].dev)
+ continue;
+
+ list_for_each_entry(ft, &table->flowtables, list) {
+ for (k = 0; k < ft->ops_len; k++) {
+ if (!ft->ops[k].dev)
+ continue;
+
+ if (flowtable->ops[i].dev == ft->ops[k].dev &&
+ flowtable->ops[i].pf == ft->ops[k].pf) {
+ err = -EBUSY;
+ goto err4;
+ }
+ }
+ }
+
err = nf_register_net_hook(net, &flowtable->ops[i]);
if (err < 0)
goto err4;
@@ -5119,8 +5196,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
err5:
i = flowtable->ops_len;
err4:
- for (k = i - 1; k >= 0; k--)
- nf_unregister_net_hook(net, &flowtable->ops[i]);
+ for (k = i - 1; k >= 0; k--) {
+ kfree(flowtable->dev_name[k]);
+ nf_unregister_net_hook(net, &flowtable->ops[k]);
+ }
kfree(flowtable->ops);
err3:
@@ -5145,6 +5224,11 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_ctx ctx;
+ if (!nla[NFTA_FLOWTABLE_TABLE] ||
+ (!nla[NFTA_FLOWTABLE_NAME] &&
+ !nla[NFTA_FLOWTABLE_HANDLE]))
+ return -EINVAL;
+
table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
family, genmask);
if (IS_ERR(table))
@@ -5205,9 +5289,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->ops[i].dev &&
+ if (flowtable->dev_name[i][0] &&
nla_put_string(skb, NFTA_DEVICE_NAME,
- flowtable->ops[i].dev->name))
+ flowtable->dev_name[i]))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -5402,6 +5486,7 @@ err:
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
cancel_delayed_work_sync(&flowtable->data.gc_work);
+ kfree(flowtable->ops);
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
rhashtable_destroy(&flowtable->data.rhashtable);
@@ -5448,6 +5533,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
continue;
nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
+ flowtable->dev_name[i][0] = '\0';
flowtable->ops[i].dev = NULL;
break;
}
@@ -5675,7 +5761,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_DELCHAIN:
- nf_tables_chain_destroy(trans->ctx.chain);
+ nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
@@ -5846,7 +5932,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
- nf_tables_chain_destroy(trans->ctx.chain);
+ nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_NEWRULE:
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
@@ -5993,7 +6079,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
- enum nft_chain_type type)
+ enum nft_chain_types type)
{
const struct nft_base_chain *basechain;
@@ -6496,7 +6582,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
list_del(&ctx->chain->list);
ctx->table->use--;
- nf_tables_chain_destroy(ctx->chain);
+ nf_tables_chain_destroy(ctx);
return 0;
}
@@ -6512,6 +6598,7 @@ static void __nft_release_tables(struct net *net)
struct nft_set *set, *ns;
struct nft_ctx ctx = {
.net = net,
+ .family = NFPROTO_NETDEV,
};
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
@@ -6548,9 +6635,10 @@ static void __nft_release_tables(struct net *net)
nft_obj_destroy(obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ ctx.chain = chain;
list_del(&chain->list);
table->use--;
- nf_tables_chain_destroy(chain);
+ nf_tables_chain_destroy(&ctx);
}
list_del(&table->list);
nf_tables_table_destroy(&ctx);
@@ -6581,6 +6669,8 @@ static int __init nf_tables_module_init(void)
{
int err;
+ nft_chain_filter_init();
+
info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
GFP_KERNEL);
if (info == NULL) {
@@ -6615,6 +6705,7 @@ static void __exit nf_tables_module_exit(void)
rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
+ nft_chain_filter_fini();
}
module_init(nf_tables_module_init);
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
deleted file mode 100644
index e30c7da09d0d..000000000000
--- a/net/netfilter/nf_tables_inet.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2012-2014 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/ip.h>
-
-static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (state->pf) {
- case NFPROTO_IPV4:
- nft_set_pktinfo_ipv4(&pkt, skb);
- break;
- case NFPROTO_IPV6:
- nft_set_pktinfo_ipv6(&pkt, skb);
- break;
- default:
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_inet = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_INET,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_inet,
- [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
- [NF_INET_FORWARD] = nft_do_chain_inet,
- [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
- [NF_INET_POST_ROUTING] = nft_do_chain_inet,
- },
-};
-
-static int __init nf_tables_inet_init(void)
-{
- return nft_register_chain_type(&filter_inet);
-}
-
-static void __exit nf_tables_inet_exit(void)
-{
- nft_unregister_chain_type(&filter_inet);
-}
-
-module_init(nf_tables_inet_init);
-module_exit(nf_tables_inet_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(1, "filter");
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
deleted file mode 100644
index 4041fafca934..000000000000
--- a/net/netfilter/nf_tables_netdev.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_netdev(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
- break;
- case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
- break;
- default:
- nft_set_pktinfo_unspec(&pkt, skb);
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type nft_filter_chain_netdev = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_NETDEV,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_NETDEV_INGRESS),
- .hooks = {
- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
- },
-};
-
-static void nft_netdev_event(unsigned long event, struct net_device *dev,
- struct nft_ctx *ctx)
-{
- struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
-
- switch (event) {
- case NETDEV_UNREGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- __nft_release_basechain(ctx);
- break;
- case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops.dev->ifindex)
- return;
-
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
- break;
- }
-}
-
-static int nf_tables_netdev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct nft_table *table;
- struct nft_chain *chain, *nr;
- struct nft_ctx ctx = {
- .net = dev_net(dev),
- };
-
- if (event != NETDEV_UNREGISTER &&
- event != NETDEV_CHANGENAME)
- return NOTIFY_DONE;
-
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_for_each_entry(table, &ctx.net->nft.tables, list) {
- if (table->family != NFPROTO_NETDEV)
- continue;
-
- ctx.family = table->family;
- ctx.table = table;
- list_for_each_entry_safe(chain, nr, &table->chains, list) {
- if (!nft_is_base_chain(chain))
- continue;
-
- ctx.chain = chain;
- nft_netdev_event(event, dev, &ctx);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block nf_tables_netdev_notifier = {
- .notifier_call = nf_tables_netdev_event,
-};
-
-static int __init nf_tables_netdev_init(void)
-{
- int ret;
-
- ret = nft_register_chain_type(&nft_filter_chain_netdev);
- if (ret)
- return ret;
-
- ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
- if (ret)
- goto err_register_netdevice_notifier;
-
- return 0;
-
-err_register_netdevice_notifier:
- nft_unregister_chain_type(&nft_filter_chain_netdev);
-
- return ret;
-}
-
-static void __exit nf_tables_netdev_exit(void)
-{
- unregister_netdevice_notifier(&nf_tables_netdev_notifier);
- nft_unregister_chain_type(&nft_filter_chain_netdev);
-}
-
-module_init(nf_tables_netdev_init);
-module_exit(nf_tables_netdev_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 88d427f9f9e6..b9505bcd3827 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -467,8 +467,7 @@ static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
GFP_ATOMIC);
}
-int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
- struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct)
{
u64 now;
u64 *quota;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d33ce6d5ebce..4a4b293fb2e5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -314,23 +314,30 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
struct nf_conntrack_helper *helper)
{
- struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+ struct nf_conntrack_expect_policy *new_policy;
struct nf_conntrack_expect_policy *policy;
- int i, err;
+ int i, ret = 0;
+
+ new_policy = kmalloc_array(helper->expect_class_max + 1,
+ sizeof(*new_policy), GFP_KERNEL);
+ if (!new_policy)
+ return -ENOMEM;
/* Check first that all policy attributes are well-formed, so we don't
* leave things in inconsistent state on errors.
*/
for (i = 0; i < helper->expect_class_max + 1; i++) {
- if (!tb[NFCTH_POLICY_SET + i])
- return -EINVAL;
+ if (!tb[NFCTH_POLICY_SET + i]) {
+ ret = -EINVAL;
+ goto err;
+ }
- err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+ ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
&new_policy[i],
tb[NFCTH_POLICY_SET + i]);
- if (err < 0)
- return err;
+ if (ret < 0)
+ goto err;
}
/* Now we can safely update them. */
for (i = 0; i < helper->expect_class_max + 1; i++) {
@@ -340,7 +347,9 @@ static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
policy->timeout = new_policy->timeout;
}
- return 0;
+err:
+ kfree(new_policy);
+ return ret;
}
static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 95b04702a655..9ee5fa551fa6 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -51,19 +51,27 @@ ctnl_timeout_parse_policy(void *timeouts,
const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
+ struct nlattr **tb;
int ret = 0;
- if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
- struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
+ if (!l4proto->ctnl_timeout.nlattr_to_obj)
+ return 0;
- ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
- attr, l4proto->ctnl_timeout.nla_policy,
- NULL);
- if (ret < 0)
- return ret;
+ tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+ GFP_KERNEL);
- ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
- }
+ if (!tb)
+ return -ENOMEM;
+
+ ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
+ l4proto->ctnl_timeout.nla_policy, NULL);
+ if (ret < 0)
+ goto err;
+
+ ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+
+err:
+ kfree(tb);
return ret;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..74a04638ef03 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -14,6 +14,9 @@
* published by the Free Software Foundation.
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/init.h>
@@ -833,11 +836,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
if (diff > skb_tailroom(e->skb)) {
nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
diff, GFP_ATOMIC);
- if (!nskb) {
- printk(KERN_WARNING "nf_queue: OOM "
- "in mangle, dropping packet\n");
+ if (!nskb)
return -ENOMEM;
- }
kfree_skb(e->skb);
e->skb = nskb;
}
@@ -1536,20 +1536,20 @@ static int __init nfnetlink_queue_init(void)
status = register_pernet_subsys(&nfnl_queue_net_ops);
if (status < 0) {
- pr_err("nf_queue: failed to register pernet ops\n");
+ pr_err("failed to register pernet ops\n");
goto out;
}
netlink_register_notifier(&nfqnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfqnl_subsys);
if (status < 0) {
- pr_err("nf_queue: failed to create netlink socket\n");
+ pr_err("failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
status = register_netdevice_notifier(&nfqnl_dev_notifier);
if (status < 0) {
- pr_err("nf_queue: failed to register netdevice notifier\n");
+ pr_err("failed to register netdevice notifier\n");
goto cleanup_netlink_subsys;
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
new file mode 100644
index 000000000000..84c902477a91
--- /dev/null
+++ b/net/netfilter/nft_chain_filter.c
@@ -0,0 +1,398 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nft_do_chain_ipv4(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv4 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV4,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+
+static void nft_chain_filter_ipv4_init(void)
+{
+ nft_register_chain_type(&nft_chain_filter_ipv4);
+}
+static void nft_chain_filter_ipv4_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_ipv4);
+}
+
+#else
+static inline void nft_chain_filter_ipv4_init(void) {}
+static inline void nft_chain_filter_ipv4_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV4 */
+
+#ifdef CONFIG_NF_TABLES_ARP
+static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_arp = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_ARP,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT),
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ },
+};
+
+static void nft_chain_filter_arp_init(void)
+{
+ nft_register_chain_type(&nft_chain_filter_arp);
+}
+
+static void nft_chain_filter_arp_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_arp);
+}
+#else
+static inline void nft_chain_filter_arp_init(void) {}
+static inline void nft_chain_filter_arp_fini(void) {}
+#endif /* CONFIG_NF_TABLES_ARP */
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nft_do_chain_ipv6(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv6 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV6,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+};
+
+static void nft_chain_filter_ipv6_init(void)
+{
+ nft_register_chain_type(&nft_chain_filter_ipv6);
+}
+
+static void nft_chain_filter_ipv6_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_ipv6);
+}
+#else
+static inline void nft_chain_filter_ipv6_init(void) {}
+static inline void nft_chain_filter_ipv6_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+ case NFPROTO_IPV6:
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_inet = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_INET,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+ },
+};
+
+static void nft_chain_filter_inet_init(void)
+{
+ nft_register_chain_type(&nft_chain_filter_inet);
+}
+
+static void nft_chain_filter_inet_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_inet);
+}
+#else
+static inline void nft_chain_filter_inet_init(void) {}
+static inline void nft_chain_filter_inet_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#ifdef CONFIG_NF_TABLES_BRIDGE
+static unsigned int
+nft_do_chain_bridge(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ break;
+ case htons(ETH_P_IPV6):
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ break;
+ default:
+ nft_set_pktinfo_unspec(&pkt, skb);
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_bridge = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_BRIDGE,
+ .hook_mask = (1 << NF_BR_PRE_ROUTING) |
+ (1 << NF_BR_LOCAL_IN) |
+ (1 << NF_BR_FORWARD) |
+ (1 << NF_BR_LOCAL_OUT) |
+ (1 << NF_BR_POST_ROUTING),
+ .hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+ [NF_BR_FORWARD] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
+ },
+};
+
+static void nft_chain_filter_bridge_init(void)
+{
+ nft_register_chain_type(&nft_chain_filter_bridge);
+}
+
+static void nft_chain_filter_bridge_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_bridge);
+}
+#else
+static inline void nft_chain_filter_bridge_init(void) {}
+static inline void nft_chain_filter_bridge_fini(void) {}
+#endif /* CONFIG_NF_TABLES_BRIDGE */
+
+#ifdef CONFIG_NF_TABLES_NETDEV
+static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ break;
+ case htons(ETH_P_IPV6):
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ break;
+ default:
+ nft_set_pktinfo_unspec(&pkt, skb);
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_netdev = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_NETDEV,
+ .hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
+};
+
+static void nft_netdev_event(unsigned long event, struct net_device *dev,
+ struct nft_ctx *ctx)
+{
+ struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (strcmp(basechain->dev_name, dev->name) != 0)
+ return;
+
+ __nft_release_basechain(ctx);
+ break;
+ case NETDEV_CHANGENAME:
+ if (dev->ifindex != basechain->ops.dev->ifindex)
+ return;
+
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ break;
+ }
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_table *table;
+ struct nft_chain *chain, *nr;
+ struct nft_ctx ctx = {
+ .net = dev_net(dev),
+ };
+
+ if (event != NETDEV_UNREGISTER &&
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(table, &ctx.net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
+ continue;
+
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry_safe(chain, nr, &table->chains, list) {
+ if (!nft_is_base_chain(chain))
+ continue;
+
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
+ }
+ }
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_netdev_notifier = {
+ .notifier_call = nf_tables_netdev_event,
+};
+
+static int nft_chain_filter_netdev_init(void)
+{
+ int err;
+
+ nft_register_chain_type(&nft_chain_filter_netdev);
+
+ err = register_netdevice_notifier(&nf_tables_netdev_notifier);
+ if (err)
+ goto err_register_netdevice_notifier;
+
+ return 0;
+
+err_register_netdevice_notifier:
+ nft_unregister_chain_type(&nft_chain_filter_netdev);
+
+ return err;
+}
+
+static void nft_chain_filter_netdev_fini(void)
+{
+ nft_unregister_chain_type(&nft_chain_filter_netdev);
+ unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+}
+#else
+static inline int nft_chain_filter_netdev_init(void) { return 0; }
+static inline void nft_chain_filter_netdev_fini(void) {}
+#endif /* CONFIG_NF_TABLES_NETDEV */
+
+int __init nft_chain_filter_init(void)
+{
+ int err;
+
+ err = nft_chain_filter_netdev_init();
+ if (err < 0)
+ return err;
+
+ nft_chain_filter_ipv4_init();
+ nft_chain_filter_ipv6_init();
+ nft_chain_filter_arp_init();
+ nft_chain_filter_inet_init();
+ nft_chain_filter_bridge_init();
+
+ return 0;
+}
+
+void __exit nft_chain_filter_fini(void)
+{
+ nft_chain_filter_bridge_fini();
+ nft_chain_filter_inet_fini();
+ nft_chain_filter_arp_fini();
+ nft_chain_filter_ipv6_fini();
+ nft_chain_filter_ipv4_fini();
+ nft_chain_filter_netdev_fini();
+}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 6ab274b14484..ea737fd789e8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -196,6 +196,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
case NFT_CT_PROTO_DST:
nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
return;
+ case NFT_CT_SRC_IP:
+ if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+ goto err;
+ *dest = tuple->src.u3.ip;
+ return;
+ case NFT_CT_DST_IP:
+ if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+ goto err;
+ *dest = tuple->dst.u3.ip;
+ return;
+ case NFT_CT_SRC_IP6:
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ goto err;
+ memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
+ return;
+ case NFT_CT_DST_IP6:
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ goto err;
+ memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
+ return;
default:
break;
}
@@ -419,6 +439,20 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
return -EAFNOSUPPORT;
}
break;
+ case NFT_CT_SRC_IP:
+ case NFT_CT_DST_IP:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
+ break;
+ case NFT_CT_SRC_IP6:
+ case NFT_CT_DST_IP6:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
+ break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
@@ -588,6 +622,10 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
switch (priv->key) {
case NFT_CT_SRC:
case NFT_CT_DST:
+ case NFT_CT_SRC_IP:
+ case NFT_CT_DST_IP:
+ case NFT_CT_SRC_IP6:
+ case NFT_CT_DST_IP6:
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index fc83e29d6634..04863fad05dd 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -132,8 +132,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->invert = true;
}
- set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
- tb[NFTA_DYNSET_SET_ID], genmask);
+ set = nft_set_lookup_global(ctx->net, ctx->table,
+ tb[NFTA_DYNSET_SET_NAME],
+ tb[NFTA_DYNSET_SET_ID], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 475570e89ede..f52da5e2199f 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,8 +71,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
- tb[NFTA_LOOKUP_SET_ID], genmask);
+ set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+ tb[NFTA_LOOKUP_SET_ID], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7bcdc48f3d73..0b02407773ad 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -117,8 +117,9 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
struct nft_set *set;
int err;
- set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
- tb[NFTA_OBJREF_SET_ID], genmask);
+ set = nft_set_lookup_global(ctx->net, ctx->table,
+ tb[NFTA_OBJREF_SET_NAME],
+ tb[NFTA_OBJREF_SET_ID], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3f1624ee056f..fc9c6d5d64cd 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
u32 flags)
{
- if (desc->size) {
+ if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
switch (desc->klen) {
case 4:
return &nft_hash_fast_ops;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fa1655aff8d3..71325fef647d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define XT_PCPU_BLOCK_SIZE 4096
+#define XT_MAX_TABLE_SIZE (512 * 1024 * 1024)
struct compat_delta {
unsigned int offset; /* offset in kernel */
@@ -423,6 +424,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
return buf;
}
+/**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+int xt_check_proc_name(const char *name, unsigned int size)
+{
+ if (name[0] == '\0')
+ return -EINVAL;
+
+ if (strnlen(name, size) == size)
+ return -ENAMETOOLONG;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0 ||
+ strchr(name, '/'))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(xt_check_proc_name);
+
int xt_check_match(struct xt_mtchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -518,19 +549,104 @@ static int xt_check_entry_match(const char *match, const char *target,
return 0;
}
+/** xt_check_table_hooks - check hook entry points are sane
+ *
+ * @info xt_table_info to check
+ * @valid_hooks - hook entry points that we can enter from
+ *
+ * Validates that the hook entry and underflows points are set up.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
+{
+ const char *err = "unsorted underflow";
+ unsigned int i, max_uflow, max_entry;
+ bool check_hooks = false;
+
+ BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
+
+ max_entry = 0;
+ max_uflow = 0;
+
+ for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
+ if (!(valid_hooks & (1 << i)))
+ continue;
+
+ if (info->hook_entry[i] == 0xFFFFFFFF)
+ return -EINVAL;
+ if (info->underflow[i] == 0xFFFFFFFF)
+ return -EINVAL;
+
+ if (check_hooks) {
+ if (max_uflow > info->underflow[i])
+ goto error;
+
+ if (max_uflow == info->underflow[i]) {
+ err = "duplicate underflow";
+ goto error;
+ }
+ if (max_entry > info->hook_entry[i]) {
+ err = "unsorted entry";
+ goto error;
+ }
+ if (max_entry == info->hook_entry[i]) {
+ err = "duplicate entry";
+ goto error;
+ }
+ }
+ max_entry = info->hook_entry[i];
+ max_uflow = info->underflow[i];
+ check_hooks = true;
+ }
+
+ return 0;
+error:
+ pr_err_ratelimited("%s at hook %d\n", err, i);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(xt_check_table_hooks);
+
+static bool verdict_ok(int verdict)
+{
+ if (verdict > 0)
+ return true;
+
+ if (verdict < 0) {
+ int v = -verdict - 1;
+
+ if (verdict == XT_RETURN)
+ return true;
+
+ switch (v) {
+ case NF_ACCEPT: return true;
+ case NF_DROP: return true;
+ case NF_QUEUE: return true;
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ return false;
+}
+
+static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
+ const char *msg, unsigned int msglen)
+{
+ return usersize == kernsize && strnlen(msg, msglen) < msglen;
+}
+
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
struct xt_af *xp = &xt[af];
- if (!xp->compat_tab) {
- if (!xp->number)
- return -EINVAL;
- xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
- if (!xp->compat_tab)
- return -ENOMEM;
- xp->cur = 0;
- }
+ WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
+ if (WARN_ON(!xp->compat_tab))
+ return -ENOMEM;
if (xp->cur >= xp->number)
return -EINVAL;
@@ -546,6 +662,8 @@ EXPORT_SYMBOL_GPL(xt_compat_add_offset);
void xt_compat_flush_offsets(u_int8_t af)
{
+ WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
if (xt[af].compat_tab) {
vfree(xt[af].compat_tab);
xt[af].compat_tab = NULL;
@@ -573,10 +691,30 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
}
EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
-void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+int xt_compat_init_offsets(u8 af, unsigned int number)
{
+ size_t mem;
+
+ WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
+ if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
+ return -EINVAL;
+
+ if (WARN_ON(xt[af].compat_tab))
+ return -EINVAL;
+
+ mem = sizeof(struct compat_delta) * number;
+ if (mem > XT_MAX_TABLE_SIZE)
+ return -ENOMEM;
+
+ xt[af].compat_tab = vmalloc(mem);
+ if (!xt[af].compat_tab)
+ return -ENOMEM;
+
xt[af].number = number;
xt[af].cur = 0;
+
+ return 0;
}
EXPORT_SYMBOL(xt_compat_init_offsets);
@@ -654,6 +792,11 @@ struct compat_xt_standard_target {
compat_uint_t verdict;
};
+struct compat_xt_error_target {
+ struct compat_xt_entry_target t;
+ char errorname[XT_FUNCTION_MAXNAMELEN];
+};
+
int xt_compat_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset)
@@ -675,9 +818,21 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
if (target_offset + t->u.target_size > next_offset)
return -EINVAL;
- if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
- COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
- return -EINVAL;
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+ const struct compat_xt_standard_target *st = (const void *)t;
+
+ if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+ return -EINVAL;
+
+ if (!verdict_ok(st->verdict))
+ return -EINVAL;
+ } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+ const struct compat_xt_error_target *et = (const void *)t;
+
+ if (!error_tg_ok(t->u.target_size, sizeof(*et),
+ et->errorname, sizeof(et->errorname)))
+ return -EINVAL;
+ }
/* compat_xt_entry match has less strict alignment requirements,
* otherwise they are identical. In case of padding differences
@@ -757,9 +912,21 @@ int xt_check_entry_offsets(const void *base,
if (target_offset + t->u.target_size > next_offset)
return -EINVAL;
- if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
- XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
- return -EINVAL;
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+ const struct xt_standard_target *st = (const void *)t;
+
+ if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+ return -EINVAL;
+
+ if (!verdict_ok(st->verdict))
+ return -EINVAL;
+ } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+ const struct xt_error_target *et = (const void *)t;
+
+ if (!error_tg_ok(t->u.target_size, sizeof(*et),
+ et->errorname, sizeof(et->errorname)))
+ return -EINVAL;
+ }
return xt_check_entry_match(elems, base + target_offset,
__alignof__(struct xt_entry_match));
@@ -775,6 +942,9 @@ EXPORT_SYMBOL(xt_check_entry_offsets);
*/
unsigned int *xt_alloc_entry_offsets(unsigned int size)
{
+ if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int))
+ return NULL;
+
return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO);
}
@@ -999,7 +1169,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
struct xt_table_info *info = NULL;
size_t sz = sizeof(*info) + size;
- if (sz < sizeof(*info))
+ if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
return NULL;
/* __GFP_NORETRY is not fully supported by kvmalloc but it should
@@ -1168,6 +1338,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
return 0;
}
+struct xt_counters *xt_counters_alloc(unsigned int counters)
+{
+ struct xt_counters *mem;
+
+ if (counters == 0 || counters > INT_MAX / sizeof(*mem))
+ return NULL;
+
+ counters *= sizeof(*mem);
+ if (counters > XT_MAX_TABLE_SIZE)
+ return NULL;
+
+ return vzalloc(counters);
+}
+EXPORT_SYMBOL(xt_counters_alloc);
+
struct xt_table_info *
xt_replace_table(struct xt_table *table,
unsigned int num_counters,
@@ -1699,7 +1884,9 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
* to fetch the real percpu counter.
*
* To speed up allocation and improve data locality, a 4kb block is
- * allocated.
+ * allocated. Freeing any counter may free an entire block, so all
+ * counters allocated using the same state must be freed at the same
+ * time.
*
* xt_percpu_counter_alloc_state contains the base address of the
* allocated page and the current sub-offset.
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 1ac6600bfafd..5ee859193783 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -132,7 +132,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
ret = -ENOMEM;
goto out_free_timer;
}
- info->timer->attr.attr.mode = S_IRUGO;
+ info->timer->attr.attr.mode = 0444;
info->timer->attr.show = idletimer_tg_show;
ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 141c295191f6..dec843cadf46 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -14,15 +14,21 @@
#include <linux/slab.h>
#include <net/gen_stats.h>
#include <net/netlink.h>
+#include <net/netns/generic.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_RATEEST.h>
#include <net/netfilter/xt_rateest.h>
-static DEFINE_MUTEX(xt_rateest_mutex);
-
#define RATEEST_HSIZE 16
-static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
+
+struct xt_rateest_net {
+ struct mutex hash_lock;
+ struct hlist_head hash[RATEEST_HSIZE];
+};
+
+static unsigned int xt_rateest_id;
+
static unsigned int jhash_rnd __read_mostly;
static unsigned int xt_rateest_hash(const char *name)
@@ -31,21 +37,23 @@ static unsigned int xt_rateest_hash(const char *name)
(RATEEST_HSIZE - 1);
}
-static void xt_rateest_hash_insert(struct xt_rateest *est)
+static void xt_rateest_hash_insert(struct xt_rateest_net *xn,
+ struct xt_rateest *est)
{
unsigned int h;
h = xt_rateest_hash(est->name);
- hlist_add_head(&est->list, &rateest_hash[h]);
+ hlist_add_head(&est->list, &xn->hash[h]);
}
-static struct xt_rateest *__xt_rateest_lookup(const char *name)
+static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn,
+ const char *name)
{
struct xt_rateest *est;
unsigned int h;
h = xt_rateest_hash(name);
- hlist_for_each_entry(est, &rateest_hash[h], list) {
+ hlist_for_each_entry(est, &xn->hash[h], list) {
if (strcmp(est->name, name) == 0) {
est->refcnt++;
return est;
@@ -55,20 +63,23 @@ static struct xt_rateest *__xt_rateest_lookup(const char *name)
return NULL;
}
-struct xt_rateest *xt_rateest_lookup(const char *name)
+struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name)
{
+ struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
struct xt_rateest *est;
- mutex_lock(&xt_rateest_mutex);
- est = __xt_rateest_lookup(name);
- mutex_unlock(&xt_rateest_mutex);
+ mutex_lock(&xn->hash_lock);
+ est = __xt_rateest_lookup(xn, name);
+ mutex_unlock(&xn->hash_lock);
return est;
}
EXPORT_SYMBOL_GPL(xt_rateest_lookup);
-void xt_rateest_put(struct xt_rateest *est)
+void xt_rateest_put(struct net *net, struct xt_rateest *est)
{
- mutex_lock(&xt_rateest_mutex);
+ struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+
+ mutex_lock(&xn->hash_lock);
if (--est->refcnt == 0) {
hlist_del(&est->list);
gen_kill_estimator(&est->rate_est);
@@ -78,7 +89,7 @@ void xt_rateest_put(struct xt_rateest *est)
*/
kfree_rcu(est, rcu);
}
- mutex_unlock(&xt_rateest_mutex);
+ mutex_unlock(&xn->hash_lock);
}
EXPORT_SYMBOL_GPL(xt_rateest_put);
@@ -98,6 +109,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
{
+ struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id);
struct xt_rateest_target_info *info = par->targinfo;
struct xt_rateest *est;
struct {
@@ -108,10 +120,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
- mutex_lock(&xt_rateest_mutex);
- est = __xt_rateest_lookup(info->name);
+ mutex_lock(&xn->hash_lock);
+ est = __xt_rateest_lookup(xn, info->name);
if (est) {
- mutex_unlock(&xt_rateest_mutex);
+ mutex_unlock(&xn->hash_lock);
/*
* If estimator parameters are specified, they must match the
* existing estimator.
@@ -119,7 +131,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
if ((!info->interval && !info->ewma_log) ||
(info->interval != est->params.interval ||
info->ewma_log != est->params.ewma_log)) {
- xt_rateest_put(est);
+ xt_rateest_put(par->net, est);
return -EINVAL;
}
info->est = est;
@@ -148,14 +160,14 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
goto err2;
info->est = est;
- xt_rateest_hash_insert(est);
- mutex_unlock(&xt_rateest_mutex);
+ xt_rateest_hash_insert(xn, est);
+ mutex_unlock(&xn->hash_lock);
return 0;
err2:
kfree(est);
err1:
- mutex_unlock(&xt_rateest_mutex);
+ mutex_unlock(&xn->hash_lock);
return ret;
}
@@ -163,7 +175,7 @@ static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
{
struct xt_rateest_target_info *info = par->targinfo;
- xt_rateest_put(info->est);
+ xt_rateest_put(par->net, info->est);
}
static struct xt_target xt_rateest_tg_reg __read_mostly = {
@@ -178,19 +190,46 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = {
.me = THIS_MODULE,
};
-static int __init xt_rateest_tg_init(void)
+static __net_init int xt_rateest_net_init(struct net *net)
+{
+ struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+ int i;
+
+ mutex_init(&xn->hash_lock);
+ for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
+ INIT_HLIST_HEAD(&xn->hash[i]);
+ return 0;
+}
+
+static void __net_exit xt_rateest_net_exit(struct net *net)
{
- unsigned int i;
+ struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
+ WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
+}
- for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
- INIT_HLIST_HEAD(&rateest_hash[i]);
+static struct pernet_operations xt_rateest_net_ops = {
+ .init = xt_rateest_net_init,
+ .exit = xt_rateest_net_exit,
+ .id = &xt_rateest_id,
+ .size = sizeof(struct xt_rateest_net),
+};
+
+static int __init xt_rateest_tg_init(void)
+{
+ int err = register_pernet_subsys(&xt_rateest_net_ops);
+ if (err)
+ return err;
return xt_register_target(&xt_rateest_tg_reg);
}
static void __exit xt_rateest_tg_fini(void)
{
xt_unregister_target(&xt_rateest_tg_reg);
+ unregister_pernet_subsys(&xt_rateest_net_ops);
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 86b0580b2216..475957cfcf50 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -20,7 +20,7 @@
#include <linux/netfilter/xt_TEE.h>
struct xt_tee_priv {
- struct notifier_block notifier;
+ struct list_head list;
struct xt_tee_tginfo *tginfo;
int oif;
};
@@ -51,29 +51,35 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
}
#endif
+static DEFINE_MUTEX(priv_list_mutex);
+static LIST_HEAD(priv_list);
+
static int tee_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct xt_tee_priv *priv;
- priv = container_of(this, struct xt_tee_priv, notifier);
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, priv->tginfo->oif))
- priv->oif = dev->ifindex;
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == priv->oif)
- priv->oif = -1;
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, priv->tginfo->oif))
- priv->oif = dev->ifindex;
- else if (dev->ifindex == priv->oif)
- priv->oif = -1;
- break;
+ mutex_lock(&priv_list_mutex);
+ list_for_each_entry(priv, &priv_list, list) {
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!strcmp(dev->name, priv->tginfo->oif))
+ priv->oif = dev->ifindex;
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev->ifindex == priv->oif)
+ priv->oif = -1;
+ break;
+ case NETDEV_CHANGENAME:
+ if (!strcmp(dev->name, priv->tginfo->oif))
+ priv->oif = dev->ifindex;
+ else if (dev->ifindex == priv->oif)
+ priv->oif = -1;
+ break;
+ }
}
+ mutex_unlock(&priv_list_mutex);
return NOTIFY_DONE;
}
@@ -89,8 +95,6 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
if (info->oif[0]) {
- int ret;
-
if (info->oif[sizeof(info->oif)-1] != '\0')
return -EINVAL;
@@ -100,14 +104,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
priv->tginfo = info;
priv->oif = -1;
- priv->notifier.notifier_call = tee_netdev_event;
info->priv = priv;
- ret = register_netdevice_notifier(&priv->notifier);
- if (ret) {
- kfree(priv);
- return ret;
- }
+ mutex_lock(&priv_list_mutex);
+ list_add(&priv->list, &priv_list);
+ mutex_unlock(&priv_list_mutex);
} else
info->priv = NULL;
@@ -120,7 +121,9 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
struct xt_tee_tginfo *info = par->targinfo;
if (info->priv) {
- unregister_netdevice_notifier(&info->priv->notifier);
+ mutex_lock(&priv_list_mutex);
+ list_del(&info->priv->list);
+ mutex_unlock(&priv_list_mutex);
kfree(info->priv);
}
static_key_slow_dec(&xt_tee_enabled);
@@ -153,13 +156,29 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
#endif
};
+static struct notifier_block tee_netdev_notifier = {
+ .notifier_call = tee_netdev_event,
+};
+
static int __init tee_tg_init(void)
{
- return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ int ret;
+
+ ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ if (ret)
+ return ret;
+ ret = register_netdevice_notifier(&tee_netdev_notifier);
+ if (ret) {
+ xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ return ret;
+ }
+
+ return 0;
}
static void __exit tee_tg_exit(void)
{
+ unregister_netdevice_notifier(&tee_netdev_notifier);
xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
}
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 0068688995c8..dfbdbb2fc0ed 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -60,13 +60,6 @@ xt_cluster_hash(const struct nf_conn *ct,
}
static inline bool
-xt_cluster_ipv6_is_multicast(const struct in6_addr *addr)
-{
- __be32 st = addr->s6_addr32[0];
- return ((st & htonl(0xFF000000)) == htonl(0xFF000000));
-}
-
-static inline bool
xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
{
bool is_multicast = false;
@@ -76,8 +69,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr);
break;
case NFPROTO_IPV6:
- is_multicast =
- xt_cluster_ipv6_is_multicast(&ipv6_hdr(skb)->daddr);
+ is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr);
break;
default:
WARN_ON(1);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b1b17b9353e1..6275106ccf50 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -67,8 +67,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
key[1] = zone->id;
}
- connections = nf_conncount_count(net, info->data, key,
- xt_family(par), tuple_ptr, zone);
+ connections = nf_conncount_count(net, info->data, key, tuple_ptr,
+ zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 809639ce6f5a..773da82190dc 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -36,9 +36,10 @@ MODULE_ALIAS("ipt_connmark");
MODULE_ALIAS("ip6t_connmark");
static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+connmark_tg_shift(struct sk_buff *skb,
+ const struct xt_connmark_tginfo1 *info,
+ u8 shift_bits, u8 shift_dir)
{
- const struct xt_connmark_tginfo1 *info = par->targinfo;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
u_int32_t newmark;
@@ -50,6 +51,10 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
switch (info->mode) {
case XT_CONNMARK_SET:
newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+ if (shift_dir == D_SHIFT_RIGHT)
+ newmark >>= shift_bits;
+ else
+ newmark <<= shift_bits;
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -57,7 +62,11 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
break;
case XT_CONNMARK_SAVE:
newmark = (ct->mark & ~info->ctmask) ^
- (skb->mark & info->nfmask);
+ (skb->mark & info->nfmask);
+ if (shift_dir == D_SHIFT_RIGHT)
+ newmark >>= shift_bits;
+ else
+ newmark <<= shift_bits;
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -65,14 +74,34 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
break;
case XT_CONNMARK_RESTORE:
newmark = (skb->mark & ~info->nfmask) ^
- (ct->mark & info->ctmask);
+ (ct->mark & info->ctmask);
+ if (shift_dir == D_SHIFT_RIGHT)
+ newmark >>= shift_bits;
+ else
+ newmark <<= shift_bits;
skb->mark = newmark;
break;
}
-
return XT_CONTINUE;
}
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_connmark_tginfo1 *info = par->targinfo;
+
+ return connmark_tg_shift(skb, info, 0, 0);
+}
+
+static unsigned int
+connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_connmark_tginfo2 *info = par->targinfo;
+
+ return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info,
+ info->shift_bits, info->shift_dir);
+}
+
static int connmark_tg_check(const struct xt_tgchk_param *par)
{
int ret;
@@ -119,15 +148,27 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
}
-static struct xt_target connmark_tg_reg __read_mostly = {
- .name = "CONNMARK",
- .revision = 1,
- .family = NFPROTO_UNSPEC,
- .checkentry = connmark_tg_check,
- .target = connmark_tg,
- .targetsize = sizeof(struct xt_connmark_tginfo1),
- .destroy = connmark_tg_destroy,
- .me = THIS_MODULE,
+static struct xt_target connmark_tg_reg[] __read_mostly = {
+ {
+ .name = "CONNMARK",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg,
+ .targetsize = sizeof(struct xt_connmark_tginfo1),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CONNMARK",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg_v2,
+ .targetsize = sizeof(struct xt_connmark_tginfo2),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
+ }
};
static struct xt_match connmark_mt_reg __read_mostly = {
@@ -145,12 +186,14 @@ static int __init connmark_mt_init(void)
{
int ret;
- ret = xt_register_target(&connmark_tg_reg);
+ ret = xt_register_targets(connmark_tg_reg,
+ ARRAY_SIZE(connmark_tg_reg));
if (ret < 0)
return ret;
ret = xt_register_match(&connmark_mt_reg);
if (ret < 0) {
- xt_unregister_target(&connmark_tg_reg);
+ xt_unregister_targets(connmark_tg_reg,
+ ARRAY_SIZE(connmark_tg_reg));
return ret;
}
return 0;
@@ -159,7 +202,7 @@ static int __init connmark_mt_init(void)
static void __exit connmark_mt_exit(void)
{
xt_unregister_match(&connmark_mt_reg);
- xt_unregister_target(&connmark_tg_reg);
+ xt_unregister_target(connmark_tg_reg);
}
module_init(connmark_mt_init);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 66f5aca62a08..0cd73567e7ff 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -534,8 +534,7 @@ static u64 user2rate_bytes(u32 user)
u64 r;
r = user ? U32_MAX / user : U32_MAX;
- r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
- return r;
+ return (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
}
static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
@@ -917,8 +916,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
struct hashlimit_cfg3 cfg = {};
int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -935,8 +935,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
struct hashlimit_cfg3 cfg = {};
int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
@@ -950,9 +951,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
static int hashlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+ int ret;
- if (info->name[sizeof(info->name) - 1] != '\0')
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
info->name, 3);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 55d18cd67635..9f098ecb2449 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -46,7 +46,7 @@ MODULE_ALIAS("ip6t_limit");
See Alexey's formal explanation in net/sched/sch_tbf.c.
- To get the maxmum range, we multiply by this factor (ie. you get N
+ To get the maximum range, we multiply by this factor (ie. you get N
credits per jiffy). We want to allow a rate as low as 1 per day
(slowest userspace tool allows), which means
CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index c8674deed4eb..6b56f4170860 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -28,7 +28,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
nfnl_acct_update(skb, info->nfacct);
- overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
+ overquota = nfnl_acct_overquota(xt_net(par), info->nfacct);
return overquota == NFACCT_UNDERQUOTA ? false : true;
}
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 755d2f6693a2..bf77326861af 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -95,13 +95,13 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
}
ret = -ENOENT;
- est1 = xt_rateest_lookup(info->name1);
+ est1 = xt_rateest_lookup(par->net, info->name1);
if (!est1)
goto err1;
est2 = NULL;
if (info->flags & XT_RATEEST_MATCH_REL) {
- est2 = xt_rateest_lookup(info->name2);
+ est2 = xt_rateest_lookup(par->net, info->name2);
if (!est2)
goto err2;
}
@@ -111,7 +111,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
return 0;
err2:
- xt_rateest_put(est1);
+ xt_rateest_put(par->net, est1);
err1:
return ret;
}
@@ -120,9 +120,9 @@ static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
{
struct xt_rateest_match_info *info = par->matchinfo;
- xt_rateest_put(info->est1);
+ xt_rateest_put(par->net, info->est1);
if (info->est2)
- xt_rateest_put(info->est2);
+ xt_rateest_put(par->net, info->est2);
}
static struct xt_match xt_rateest_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 6d232d18faff..9bbfc17ce3ec 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -51,8 +51,8 @@ static unsigned int ip_list_gid __read_mostly;
module_param(ip_list_tot, uint, 0400);
module_param(ip_list_hash_size, uint, 0400);
module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
-module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
+module_param(ip_list_uid, uint, 0644);
+module_param(ip_list_gid, uint, 0644);
MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
@@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
return -EINVAL;
}
- if (info->name[0] == '\0' ||
- strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
- return -EINVAL;
+ ret = xt_check_proc_name(info->name, sizeof(info->name));
+ if (ret)
+ return ret;
if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 423293ee57c2..be1feddadcf0 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -21,6 +21,7 @@ MODULE_DESCRIPTION("Xtables: string-based matching");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_string");
MODULE_ALIAS("ip6t_string");
+MODULE_ALIAS("ebt_string");
static bool
string_mt(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 0160f505e337..c13bcd0ab491 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -9,6 +9,9 @@
* This file is distributed under the terms of the GNU General Public
* License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -266,13 +269,11 @@ static int __init time_mt_init(void)
int minutes = sys_tz.tz_minuteswest;
if (minutes < 0) /* east of Greenwich */
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel timezone is +%02d%02d\n",
- -minutes / 60, -minutes % 60);
+ pr_info("kernel timezone is +%02d%02d\n",
+ -minutes / 60, -minutes % 60);
else /* west of Greenwich */
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel timezone is -%02d%02d\n",
- minutes / 60, minutes % 60);
+ pr_info("kernel timezone is -%02d%02d\n",
+ minutes / 60, minutes % 60);
return xt_register_match(&xt_time_mt_reg);
}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 22dc1b9d6362..c070dfc0190a 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1472,6 +1472,16 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
iface = rcu_dereference(netlbl_unlhsh_def);
if (iface == NULL || !iface->valid)
goto unlabel_getattr_nolabel;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ /* When resolving a fallback label, check the sk_buff version as
+ * it is possible (e.g. SCTP) to have family = PF_INET6 while
+ * receiving ip_hdr(skb)->version = 4.
+ */
+ if (family == PF_INET6 && ip_hdr(skb)->version == 4)
+ family = PF_INET;
+#endif /* IPv6 */
+
switch (family) {
case PF_INET: {
struct iphdr *hdr4;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 07e8478068f0..55342c4d5cec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1085,6 +1085,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
if (addr->sa_family != AF_NETLINK)
return -EINVAL;
+ if (alen < sizeof(struct sockaddr_nl))
+ return -EINVAL;
+
if ((nladdr->nl_groups || nladdr->nl_pid) &&
!netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
@@ -1105,7 +1108,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
}
static int netlink_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1116,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_family = AF_NETLINK;
nladdr->nl_pad = 0;
- *addr_len = sizeof(*nladdr);
if (peer) {
nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1126,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
}
- return 0;
+ return sizeof(*nladdr);
}
static int netlink_ioctl(struct socket *sock, unsigned int cmd,
@@ -1842,6 +1844,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_namelen) {
err = -EINVAL;
+ if (msg->msg_namelen < sizeof(struct sockaddr_nl))
+ goto out;
if (addr->nl_family != AF_NETLINK)
goto out;
dst_portid = addr->nl_pid;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6f02499ef007..b9ce82c9440f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1106,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
if (!err)
delivered = true;
else if (err != -ESRCH)
- goto error;
+ return err;
return delivered ? 0 : -ESRCH;
error:
kfree_skb(skb);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 9ba30c63be3d..4221d98a314b 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -829,11 +829,12 @@ out_release:
}
static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
+ int uaddr_len;
memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
sax->fsa_ax25.sax25_call = nr->user_addr;
memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
sax->fsa_digipeater[0] = nr->dest_addr;
- *uaddr_len = sizeof(struct full_sockaddr_ax25);
+ uaddr_len = sizeof(struct full_sockaddr_ax25);
} else {
sax->fsa_ax25.sax25_family = AF_NETROM;
sax->fsa_ax25.sax25_ndigis = 0;
sax->fsa_ax25.sax25_call = nr->source_addr;
- *uaddr_len = sizeof(struct sockaddr_ax25);
+ uaddr_len = sizeof(struct sockaddr_ax25);
}
release_sock(sk);
- return 0;
+ return uaddr_len;
}
int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
@@ -1449,9 +1450,9 @@ static int __init nr_proto_init(void)
nr_loopback_init();
- proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops);
- proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops);
- proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops);
+ proc_create("nr", 0444, init_net.proc_net, &nr_info_fops);
+ proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops);
+ proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops);
out:
return rc;
fail:
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 376040092142..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -497,7 +497,7 @@ error:
}
static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
llcp_sock->dsap, llcp_sock->ssap);
memset(llcp_addr, 0, sizeof(*llcp_addr));
- *len = sizeof(struct sockaddr_nfc_llcp);
lock_sock(sk);
if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
llcp_addr->service_name_len);
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_nfc_llcp);
}
static inline __poll_t llcp_accept_poll(struct sock *parent)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..015e24e08909 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2363,10 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
- rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net)
list_vports_from_net(net, dnet, &head);
- rtnl_unlock();
+ up_read(&net_rwsem);
/* Detach all vports from given namespace. */
list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 04b94281a30b..b891a91577f8 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+ if (band->rate == 0) {
+ err = -EINVAL;
+ goto exit_free_meter;
+ }
+
band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
/* Figure out max delta_t that is enough to fill any bucket.
* Keep max_delta_t size to the bucket units:
* pkts => 1/1000 packets, kilobits => bits.
+ *
+ * Start with a full bucket.
*/
- band_max_delta_t = (band->burst_size + band->rate) * 1000;
- /* Start with a full bucket. */
- band->bucket = band_max_delta_t;
+ band->bucket = (band->burst_size + band->rate) * 1000;
+ band_max_delta_t = band->bucket / band->rate;
if (band_max_delta_t > meter->max_delta_t)
meter->max_delta_t = band_max_delta_t;
band++;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index b6c8524032a0..f81c1d0ddff4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
return 0;
}
-static unsigned int packet_length(const struct sk_buff *skb,
- struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+ struct net_device *dev)
{
- unsigned int length = skb->len - dev->hard_header_len;
+ int length = skb->len - dev->hard_header_len;
if (!skb_vlan_tag_present(skb) &&
eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
* account for 802.1ad. e.g. is_skb_forwardable().
*/
- return length;
+ return length > 0 ? length : 0;
}
void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e0f3f4aeeb4f..616cb9c18f88 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3409,7 +3409,7 @@ out:
}
static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
- *uaddr_len = sizeof(*uaddr);
- return 0;
+ return sizeof(*uaddr);
}
static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct net_device *dev;
struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_halen = 0;
}
rcu_read_unlock();
- *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
- return 0;
+ return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
}
static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index fffcd69f63ff..f9b40e6a18a5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
}
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
pn->sobject);
- *sockaddr_len = sizeof(struct sockaddr_pn);
- return 0;
+ return sizeof(struct sockaddr_pn);
}
static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5fb3929e3d7d..b33e5aeb4c06 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
}
static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
}
release_sock(sk);
- *len = sizeof(qaddr);
qaddr.sq_family = AF_QIPCRTR;
memcpy(saddr, &qaddr, sizeof(qaddr));
- return 0;
+ return sizeof(qaddr);
}
static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 50615d5efac1..9cf089b9754e 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -114,5 +114,6 @@ static struct rpmsg_driver qcom_smd_qrtr_driver = {
module_rpmsg_driver(qcom_smd_qrtr_driver);
+MODULE_ALIAS("rpmsg:IPCRTR");
MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
MODULE_LICENSE("GPL v2");
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 744c637c86b0..ab751a150f70 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
rds_send_drop_to(rs, NULL);
rds_rdma_drop_keys(rs);
rds_notify_queue_get(rs, NULL);
+ rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
spin_lock_bh(&rds_sock_lock);
list_del_init(&rs->rs_item);
@@ -110,7 +111,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
}
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,8 +132,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_family = AF_INET;
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
/*
@@ -145,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
- * update, or a RDMA completion).
+ * update, or a RDMA completion, or a MSG_ZEROCOPY completion).
*
* EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
@@ -179,10 +179,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
- !list_empty(&rs->rs_notify_queue))
+ !list_empty(&rs->rs_notify_queue) ||
+ !list_empty(&rs->rs_zcookie_queue.zcookie_head))
mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
/* clear state any time we wake a seen-congested socket */
@@ -512,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
INIT_LIST_HEAD(&rs->rs_recv_queue);
INIT_LIST_HEAD(&rs->rs_notify_queue);
INIT_LIST_HEAD(&rs->rs_cong_list);
+ rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 2da3176bf792..abef75da89a7 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
+ u64 *buffer,
size_t item_len)
{
- uint64_t buffer[(item_len + 7) / 8];
struct hlist_head *head;
struct rds_connection *conn;
size_t i;
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_conn_path *, void *),
+ u64 *buffer,
size_t item_len)
{
- u64 buffer[(item_len + 7) / 8];
struct hlist_head *head;
struct rds_connection *conn;
size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
+ u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
rds_walk_conn_path_info(sock, len, iter, lens,
rds_conn_info_visitor,
+ buffer,
sizeof(struct rds_info_connection));
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 50a88f3e7e39..02deee29e7f1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
+ u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
rds_for_each_conn_info(sock, len, iter, lens,
rds_ib_conn_info_visitor,
+ buffer,
sizeof(struct rds_info_rdma_connection));
}
diff --git a/net/rds/message.c b/net/rds/message.c
index 4318cc9b78f7..a35f76971984 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
#include "rds.h"
@@ -45,7 +48,6 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
[RDS_EXTHDR_GEN_NUM] = sizeof(u32),
};
-
void rds_message_addref(struct rds_message *rm)
{
rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
@@ -53,20 +55,107 @@ void rds_message_addref(struct rds_message *rm)
}
EXPORT_SYMBOL_GPL(rds_message_addref);
+static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
+{
+ struct rds_zcopy_cookies *ck = &info->zcookies;
+ int ncookies = ck->num;
+
+ if (ncookies == RDS_MAX_ZCOOKIES)
+ return false;
+ ck->cookies[ncookies] = cookie;
+ ck->num = ++ncookies;
+ return true;
+}
+
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+{
+ return container_of(znotif, struct rds_msg_zcopy_info, znotif);
+}
+
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
+{
+ unsigned long flags;
+ LIST_HEAD(copy);
+ struct rds_msg_zcopy_info *info, *tmp;
+
+ spin_lock_irqsave(&q->lock, flags);
+ list_splice(&q->zcookie_head, &copy);
+ INIT_LIST_HEAD(&q->zcookie_head);
+ spin_unlock_irqrestore(&q->lock, flags);
+
+ list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
+ list_del(&info->rs_zcookie_next);
+ kfree(info);
+ }
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+ struct rds_znotifier *znotif)
+{
+ struct rds_msg_zcopy_info *info;
+ struct rds_msg_zcopy_queue *q;
+ u32 cookie = znotif->z_cookie;
+ struct rds_zcopy_cookies *ck;
+ struct list_head *head;
+ unsigned long flags;
+
+ mm_unaccount_pinned_pages(&znotif->z_mmp);
+ q = &rs->rs_zcookie_queue;
+ spin_lock_irqsave(&q->lock, flags);
+ head = &q->zcookie_head;
+ if (!list_empty(head)) {
+ info = list_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (info && rds_zcookie_add(info, cookie)) {
+ spin_unlock_irqrestore(&q->lock, flags);
+ kfree(rds_info_from_znotifier(znotif));
+ /* caller invokes rds_wake_sk_sleep() */
+ return;
+ }
+ }
+
+ info = rds_info_from_znotifier(znotif);
+ ck = &info->zcookies;
+ memset(ck, 0, sizeof(*ck));
+ WARN_ON(!rds_zcookie_add(info, cookie));
+ list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+
+ spin_unlock_irqrestore(&q->lock, flags);
+ /* caller invokes rds_wake_sk_sleep() */
+}
+
/*
* This relies on dma_map_sg() not touching sg[].page during merging.
*/
static void rds_message_purge(struct rds_message *rm)
{
- unsigned long i;
+ unsigned long i, flags;
+ bool zcopy = false;
if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
return;
+ spin_lock_irqsave(&rm->m_rs_lock, flags);
+ if (rm->m_rs) {
+ struct rds_sock *rs = rm->m_rs;
+
+ if (rm->data.op_mmp_znotifier) {
+ zcopy = true;
+ rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+ rds_wake_sk_sleep(rs);
+ rm->data.op_mmp_znotifier = NULL;
+ }
+ sock_put(rds_rs_to_sk(rs));
+ rm->m_rs = NULL;
+ }
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
for (i = 0; i < rm->data.op_nents; i++) {
- rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
/* XXX will have to put_page for page refs */
- __free_page(sg_page(&rm->data.op_sg[i]));
+ if (!zcopy)
+ __free_page(sg_page(&rm->data.op_sg[i]));
+ else
+ put_page(sg_page(&rm->data.op_sg[i]));
}
rm->data.op_nents = 0;
@@ -266,12 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
return rm;
}
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
{
- unsigned long to_copy, nbytes;
- unsigned long sg_off;
struct scatterlist *sg;
int ret = 0;
+ int length = iov_iter_count(from);
+ int total_copied = 0;
+ struct rds_msg_zcopy_info *info;
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
@@ -279,8 +369,67 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
* now allocate and copy in the data payload.
*/
sg = rm->data.op_sg;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&info->rs_zcookie_next);
+ rm->data.op_mmp_znotifier = &info->znotif;
+ if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+ length)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ while (iov_iter_count(from)) {
+ struct page *pages;
+ size_t start;
+ ssize_t copied;
+
+ copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+ 1, &start);
+ if (copied < 0) {
+ struct mmpin *mmp;
+ int i;
+
+ for (i = 0; i < rm->data.op_nents; i++)
+ put_page(sg_page(&rm->data.op_sg[i]));
+ mmp = &rm->data.op_mmp_znotifier->z_mmp;
+ mm_unaccount_pinned_pages(mmp);
+ ret = -EFAULT;
+ goto err;
+ }
+ total_copied += copied;
+ iov_iter_advance(from, copied);
+ length -= copied;
+ sg_set_page(sg, pages, copied, start);
+ rm->data.op_nents++;
+ sg++;
+ }
+ WARN_ON_ONCE(length != 0);
+ return ret;
+err:
+ kfree(info);
+ rm->data.op_mmp_znotifier = NULL;
+ return ret;
+}
+
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+ bool zcopy)
+{
+ unsigned long to_copy, nbytes;
+ unsigned long sg_off;
+ struct scatterlist *sg;
+ int ret = 0;
+
+ rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
+
+ /* now allocate and copy in the data payload. */
+ sg = rm->data.op_sg;
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
+ if (zcopy)
+ return rds_message_zcopy_from_user(rm, from);
+
while (iov_iter_count(from)) {
if (!sg_page(sg)) {
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7301b9b01890..b04c333d9d1c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -356,6 +356,30 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
#define RDS_MSG_PAGEVEC 7
#define RDS_MSG_FLUSH 8
+struct rds_znotifier {
+ struct mmpin z_mmp;
+ u32 z_cookie;
+};
+
+struct rds_msg_zcopy_info {
+ struct list_head rs_zcookie_next;
+ union {
+ struct rds_znotifier znotif;
+ struct rds_zcopy_cookies zcookies;
+ };
+};
+
+struct rds_msg_zcopy_queue {
+ struct list_head zcookie_head;
+ spinlock_t lock; /* protects zcookie_head queue */
+};
+
+static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
+{
+ spin_lock_init(&q->lock);
+ INIT_LIST_HEAD(&q->zcookie_head);
+}
+
struct rds_message {
refcount_t m_refcount;
struct list_head m_sock_item;
@@ -436,6 +460,7 @@ struct rds_message {
unsigned int op_count;
unsigned int op_dmasg;
unsigned int op_dmaoff;
+ struct rds_znotifier *op_mmp_znotifier;
struct scatterlist *op_sg;
} data;
};
@@ -589,6 +614,7 @@ struct rds_sock {
/* Socket receive path trace points*/
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+ struct rds_msg_zcopy_queue rs_zcookie_queue;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -709,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
+ u64 *buffer,
size_t item_len);
__printf(2, 3)
@@ -771,7 +798,8 @@ rds_conn_connecting(struct rds_connection *conn)
/* message.c */
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+ bool zcopy);
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
__be16 dport, u64 seq);
@@ -786,6 +814,7 @@ void rds_message_addref(struct rds_message *rm);
void rds_message_put(struct rds_message *rm);
void rds_message_wait(struct rds_message *rm);
void rds_message_unmapped(struct rds_message *rm);
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
static inline void rds_message_make_checksum(struct rds_header *hdr)
{
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b25bcfe411ca..de50e2126e40 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,41 @@ out:
return ret;
}
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+ struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue;
+ struct rds_msg_zcopy_info *info = NULL;
+ struct rds_zcopy_cookies *done;
+ unsigned long flags;
+
+ if (!msg->msg_control)
+ return false;
+
+ if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+ msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+ return false;
+
+ spin_lock_irqsave(&q->lock, flags);
+ if (!list_empty(&q->zcookie_head)) {
+ info = list_entry(q->zcookie_head.next,
+ struct rds_msg_zcopy_info, rs_zcookie_next);
+ list_del(&info->rs_zcookie_next);
+ }
+ spin_unlock_irqrestore(&q->lock, flags);
+ if (!info)
+ return false;
+ done = &info->zcookies;
+ if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+ done)) {
+ spin_lock_irqsave(&q->lock, flags);
+ list_add(&info->rs_zcookie_next, &q->zcookie_head);
+ spin_unlock_irqrestore(&q->lock, flags);
+ return false;
+ }
+ kfree(info);
+ return true;
+}
+
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int msg_flags)
{
@@ -594,6 +629,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (msg_flags & MSG_OOB)
goto out;
+ if (msg_flags & MSG_ERRQUEUE)
+ return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
while (1) {
/* If there are pending notifications, do those - and nothing else */
@@ -609,7 +646,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (!rds_next_incoming(rs, &inc)) {
if (nonblock) {
- ret = -EAGAIN;
+ bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+ ret = reaped ? 0 : -EAGAIN;
break;
}
@@ -658,6 +697,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
ret = -EFAULT;
goto out;
}
+ rds_recvmsg_zcookie(rs, msg);
rds_stats_inc(s_recv_delivered);
diff --git a/net/rds/send.c b/net/rds/send.c
index b1b0022b8370..94c7f74909be 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
rm->rdma.op_notifier = NULL;
}
was_on_sock = 1;
- rm->m_rs = NULL;
}
spin_unlock(&rs->rs_lock);
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
*/
if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
spin_unlock_irqrestore(&cp->cp_lock, flags);
- spin_lock_irqsave(&rm->m_rs_lock, flags);
- rm->m_rs = NULL;
- spin_unlock_irqrestore(&rm->m_rs_lock, flags);
continue;
}
list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
spin_unlock(&rs->rs_lock);
- rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
spin_unlock(&rs->rs_lock);
- rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
rds_message_addref(rm);
+ sock_hold(rds_rs_to_sk(rs));
rm->m_rs = rs;
/* The code ordering is a little weird, but we're
@@ -880,12 +875,13 @@ out:
* rds_message is getting to be quite complicated, and we'd like to allocate
* it all in one go. This figures out how big it needs to be up front.
*/
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
{
struct cmsghdr *cmsg;
int size = 0;
int cmsg_groups = 0;
int retval;
+ bool zcopy_cookie = false;
for_each_cmsghdr(cmsg, msg) {
if (!CMSG_OK(msg, cmsg))
@@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
break;
+ case RDS_CMSG_ZCOPY_COOKIE:
+ zcopy_cookie = true;
+ /* fall through */
+
case RDS_CMSG_RDMA_DEST:
case RDS_CMSG_RDMA_MAP:
cmsg_groups |= 2;
@@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
}
- size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+ if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+ return -EINVAL;
+
+ size += num_sgs * sizeof(struct scatterlist);
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
if (cmsg_groups == 3)
@@ -933,6 +936,19 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
return size;
}
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+ struct cmsghdr *cmsg)
+{
+ u32 *cookie;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+ !rm->data.op_mmp_znotifier)
+ return -EINVAL;
+ cookie = CMSG_DATA(cmsg);
+ rm->data.op_mmp_znotifier->z_cookie = *cookie;
+ return 0;
+}
+
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
struct msghdr *msg, int *allocated_mr)
{
@@ -975,6 +991,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
ret = rds_cmsg_atomic(rs, rm, cmsg);
break;
+ case RDS_CMSG_ZCOPY_COOKIE:
+ ret = rds_cmsg_zcopy(rs, rm, cmsg);
+ break;
+
default:
return -EINVAL;
}
@@ -997,10 +1017,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
if (conn->c_npaths == 0 && hash != 0) {
rds_send_ping(conn, 0);
- if (conn->c_npaths == 0) {
- wait_event_interruptible(conn->c_hs_waitq,
- (conn->c_npaths != 0));
- }
+ /* The underlying connection is not up yet. Need to wait
+ * until it is up to be sure that the non-zero c_path can be
+ * used. But if we are interrupted, we have to use the zero
+ * c_path in case the connection ends up being non-MP capable.
+ */
+ if (conn->c_npaths == 0)
+ if (wait_event_interruptible(conn->c_hs_waitq,
+ conn->c_npaths != 0))
+ hash = 0;
if (conn->c_npaths == 1)
hash = 0;
}
@@ -1045,10 +1070,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
long timeo = sock_sndtimeo(sk, nonblock);
struct rds_conn_path *cpath;
size_t total_payload_len = payload_len, rdma_payload_len = 0;
+ bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+ sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+ int num_sgs = ceil(payload_len, PAGE_SIZE);
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
- if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -1092,8 +1120,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
goto out;
}
+ if (zcopy) {
+ if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+ }
/* size of rm including all sgs */
- ret = rds_rm_size(msg, payload_len);
+ ret = rds_rm_size(msg, num_sgs);
if (ret < 0)
goto out;
@@ -1105,12 +1140,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Attach data to the rm */
if (payload_len) {
- rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+ rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
if (!rm->data.op_sg) {
ret = -ENOMEM;
goto out;
}
- ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+ ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
if (ret)
goto out;
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 44c4652721af..351a28474667 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
struct rds_tcp_connection *tc;
unsigned long flags;
struct sockaddr_in sin;
- int sinlen;
struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
sock = tc->t_sock;
if (sock) {
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 0);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
tsinfo.local_addr = sin.sin_addr.s_addr;
tsinfo.local_port = sin.sin_port;
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 1);
+ sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
tsinfo.peer_addr = sin.sin_addr.s_addr;
tsinfo.peer_port = sin.sin_port;
}
@@ -275,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
if (!tc->t_tcp_node_detached)
list_del(&tc->t_tcp_node);
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc);
}
@@ -311,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
rdsdebug("rds_conn_path [%d] tc %p\n", i,
conn->c_path[i].cp_transport_data);
}
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
tc = conn->c_path[i].cp_transport_data;
tc->t_tcp_node_detached = false;
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
fail:
if (ret) {
for (j = 0; j < i; j++)
@@ -487,39 +485,6 @@ fail:
return err;
}
-static void __net_exit rds_tcp_exit_net(struct net *net)
-{
- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-
- if (rtn->rds_tcp_sysctl)
- unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-
- if (net != &init_net && rtn->ctl_table)
- kfree(rtn->ctl_table);
-
- /* If rds_tcp_exit_net() is called as a result of netns deletion,
- * the rds_tcp_kill_sock() device notifier would already have cleaned
- * up the listen socket, thus there is no work to do in this function.
- *
- * If rds_tcp_exit_net() is called as a result of module unload,
- * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
- * we do need to clean up the listen socket here.
- */
- if (rtn->rds_tcp_listen_sock) {
- struct socket *lsock = rtn->rds_tcp_listen_sock;
-
- rtn->rds_tcp_listen_sock = NULL;
- rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
- }
-}
-
-static struct pernet_operations rds_tcp_net_ops = {
- .init = rds_tcp_init_net,
- .exit = rds_tcp_exit_net,
- .id = &rds_tcp_netid,
- .size = sizeof(struct rds_tcp_net),
-};
-
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
@@ -529,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net)
rtn->rds_tcp_listen_sock = NULL;
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -542,45 +507,42 @@ static void rds_tcp_kill_sock(struct net *net)
tc->t_tcp_node_detached = true;
}
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
}
-void *rds_tcp_listen_sock_def_readable(struct net *net)
+static void __net_exit rds_tcp_exit_net(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
- struct socket *lsock = rtn->rds_tcp_listen_sock;
- if (!lsock)
- return NULL;
+ rds_tcp_kill_sock(net);
- return lsock->sk->sk_user_data;
+ if (rtn->rds_tcp_sysctl)
+ unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+ if (net != &init_net && rtn->ctl_table)
+ kfree(rtn->ctl_table);
}
-static int rds_tcp_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static struct pernet_operations rds_tcp_net_ops = {
+ .init = rds_tcp_init_net,
+ .exit = rds_tcp_exit_net,
+ .id = &rds_tcp_netid,
+ .size = sizeof(struct rds_tcp_net),
+};
+
+void *rds_tcp_listen_sock_def_readable(struct net *net)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
- /* rds-tcp registers as a pernet subys, so the ->exit will only
- * get invoked after network acitivity has quiesced. We need to
- * clean up all sockets to quiesce network activity, and use
- * the unregistration of the per-net loopback device as a trigger
- * to start that cleanup.
- */
- if (event == NETDEV_UNREGISTER_FINAL &&
- dev->ifindex == LOOPBACK_IFINDEX)
- rds_tcp_kill_sock(dev_net(dev));
+ if (!lsock)
+ return NULL;
- return NOTIFY_DONE;
+ return lsock->sk->sk_user_data;
}
-static struct notifier_block rds_tcp_dev_notifier = {
- .notifier_call = rds_tcp_dev_event,
- .priority = -10, /* must be called after other network notifiers */
-};
-
/* when sysctl is used to modify some kernel socket parameters,this
* function resets the RDS connections in that netns so that we can
* restart with new parameters. The assumption is that such reset
@@ -590,7 +552,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -600,7 +562,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
/* reconnect with new parameters */
rds_conn_path_drop(tc->t_cpath, false);
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
}
static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -626,9 +588,7 @@ static void rds_tcp_exit(void)
rds_tcp_set_unloading();
synchronize_rcu();
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
- unregister_pernet_subsys(&rds_tcp_net_ops);
- if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
- pr_warn("could not unregister rds_tcp_dev_notifier\n");
+ unregister_pernet_device(&rds_tcp_net_ops);
rds_tcp_destroy_conns();
rds_trans_unregister(&rds_tcp_transport);
rds_tcp_recv_exit();
@@ -652,24 +612,15 @@ static int rds_tcp_init(void)
if (ret)
goto out_slab;
- ret = register_pernet_subsys(&rds_tcp_net_ops);
+ ret = register_pernet_device(&rds_tcp_net_ops);
if (ret)
goto out_recv;
- ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
- if (ret) {
- pr_warn("could not register rds_tcp_dev_notifier\n");
- goto out_pernet;
- }
-
rds_trans_register(&rds_tcp_transport);
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
goto out;
-
-out_pernet:
- unregister_pernet_subsys(&rds_tcp_net_ops);
out_recv:
rds_tcp_recv_exit();
out_slab:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c061d6eb465d..22571189f21e 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -142,12 +142,20 @@ int rds_tcp_accept_one(struct socket *sock)
if (ret)
goto out;
- new_sock->type = sock->type;
- new_sock->ops = sock->ops;
ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
if (ret < 0)
goto out;
+ /* sock_create_lite() does not get a hold on the owner module so we
+ * need to do it here. Note that sock_release() uses sock->ops to
+ * determine if it needs to decrement the reference count. So set
+ * sock->ops after calling accept() in case that fails. And there's
+ * no need to do try_module_get() as the listener should have a hold
+ * already.
+ */
+ new_sock->ops = sock->ops;
+ __module_get(new_sock->ops->owner);
+
ret = rds_tcp_keepalive(new_sock);
if (ret < 0)
goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 083bd251406f..9ff5e0a76593 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -938,7 +938,7 @@ out_release:
}
static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
srose->srose_digis[n] = rose->source_digis[n];
}
- *uaddr_len = sizeof(struct full_sockaddr_rose);
- return 0;
+ return sizeof(struct full_sockaddr_rose);
}
int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
@@ -1568,12 +1567,12 @@ static int __init rose_proto_init(void)
rose_add_loopback_neigh();
- proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops);
- proc_create("rose_neigh", S_IRUGO, init_net.proc_net,
+ proc_create("rose", 0444, init_net.proc_net, &rose_info_fops);
+ proc_create("rose_neigh", 0444, init_net.proc_net,
&rose_neigh_fops);
- proc_create("rose_nodes", S_IRUGO, init_net.proc_net,
+ proc_create("rose_nodes", 0444, init_net.proc_net,
&rose_nodes_fops);
- proc_create("rose_routes", S_IRUGO, init_net.proc_net,
+ proc_create("rose_routes", 0444, init_net.proc_net,
&rose_routes_fops);
out:
return rc;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0c9c18aa7c77..9a2c8e7c000e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -32,7 +32,7 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_RXRPC);
unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
-module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, rxrpc_debug, uint, 0644);
MODULE_PARM_DESC(debug, "RxRPC debugging mask");
static struct proto rxrpc_proto;
@@ -40,6 +40,7 @@ static const struct proto_ops rxrpc_rpc_ops;
/* current debugging ID */
atomic_t rxrpc_debug_id;
+EXPORT_SYMBOL(rxrpc_debug_id);
/* count of skbs currently in use */
atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
@@ -267,6 +268,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
* @upgrade: Request service upgrade for call
+ * @debug_id: The debug ID for tracing to be assigned to the call
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -282,7 +284,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
s64 tx_total_len,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
- bool upgrade)
+ bool upgrade,
+ unsigned int debug_id)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call_params p;
@@ -314,13 +317,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.exclusive = false;
cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
mutex_unlock(&call->user_mutex);
}
+ rxrpc_put_peer(cp.peer);
_leave(" = %p", call);
return call;
}
@@ -444,6 +448,7 @@ int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
mutex_unlock(&call->user_mutex);
+ rxrpc_put_peer(cp.peer);
_leave(" = %d", ret);
return ret;
}
@@ -759,6 +764,7 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ struct rxrpc_net *rxnet;
struct rxrpc_sock *rx;
struct sock *sk;
@@ -798,6 +804,9 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
+ rxnet = rxrpc_net(sock_net(&rx->sk));
+ timer_reduce(&rxnet->peer_keepalive_timer, jiffies + 1);
+
_leave(" = 0 [%p]", rx);
return 0;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 416688381eb7..90d7079e0aa9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -75,7 +75,9 @@ struct rxrpc_net {
u32 epoch; /* Local epoch for detecting local-end reset */
struct list_head calls; /* List of calls active in this namespace */
rwlock_t call_lock; /* Lock for ->calls */
+ atomic_t nr_calls; /* Count of allocated calls */
+ atomic_t nr_conns;
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
@@ -97,8 +99,16 @@ struct rxrpc_net {
struct list_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
- spinlock_t peer_hash_lock; /* Lock for ->peer_hash */
DECLARE_HASHTABLE (peer_hash, 10);
+ spinlock_t peer_hash_lock; /* Lock for ->peer_hash */
+
+#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
+ u8 peer_keepalive_cursor;
+ ktime_t peer_keepalive_base;
+ struct hlist_head peer_keepalive[RXRPC_KEEPALIVE_TIME + 1];
+ struct hlist_head peer_keepalive_new;
+ struct timer_list peer_keepalive_timer;
+ struct work_struct peer_keepalive_work;
};
/*
@@ -285,6 +295,8 @@ struct rxrpc_peer {
struct hlist_head error_targets; /* targets for net error distribution */
struct work_struct error_distributor;
struct rb_root service_conns; /* Service connections */
+ struct hlist_node keepalive_link; /* Link in net->peer_keepalive[] */
+ time64_t last_tx_at; /* Last time packet sent here */
seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
unsigned int if_mtu; /* interface MTU for this peer */
@@ -518,6 +530,7 @@ struct rxrpc_call {
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
+ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
struct mutex user_mutex; /* User access mutex */
unsigned long ack_at; /* When deferred ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
@@ -691,7 +704,6 @@ struct rxrpc_send_params {
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
-extern atomic_t rxrpc_debug_id;
extern struct workqueue_struct *rxrpc_workqueue;
/*
@@ -732,11 +744,12 @@ extern unsigned int rxrpc_max_call_lifetime;
extern struct kmem_cache *rxrpc_call_jar;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
-struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
- struct rxrpc_call_params *, gfp_t);
+ struct rxrpc_call_params *, gfp_t,
+ unsigned int);
int rxrpc_retry_client_call(struct rxrpc_sock *,
struct rxrpc_call *,
struct rxrpc_conn_parameters *,
@@ -778,6 +791,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
call->error = error;
call->completion = compl,
call->state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
wake_up(&call->waitq);
return true;
}
@@ -822,7 +836,7 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
rxrpc_seq_t seq,
u32 abort_code, int error)
{
- trace_rxrpc_abort(why, call->cid, call->call_id, seq,
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
abort_code, error);
return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
abort_code, error);
@@ -968,31 +982,12 @@ extern void rxrpc_process_local_events(struct rxrpc_local *);
* local_object.c
*/
struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
-void __rxrpc_put_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
+void rxrpc_put_local(struct rxrpc_local *);
+void rxrpc_queue_local(struct rxrpc_local *);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
-static inline void rxrpc_get_local(struct rxrpc_local *local)
-{
- atomic_inc(&local->usage);
-}
-
-static inline
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
-{
- return atomic_inc_not_zero(&local->usage) ? local : NULL;
-}
-
-static inline void rxrpc_put_local(struct rxrpc_local *local)
-{
- if (local && atomic_dec_and_test(&local->usage))
- __rxrpc_put_local(local);
-}
-
-static inline void rxrpc_queue_local(struct rxrpc_local *local)
-{
- rxrpc_queue_work(&local->processor);
-}
-
/*
* misc.c
*/
@@ -1025,6 +1020,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
void rxrpc_reject_packets(struct rxrpc_local *);
+void rxrpc_send_keepalive(struct rxrpc_peer *);
/*
* peer_event.c
@@ -1033,6 +1029,7 @@ void rxrpc_error_report(struct sock *);
void rxrpc_peer_error_distributor(struct work_struct *);
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
+void rxrpc_peer_keepalive_worker(struct work_struct *);
/*
* peer_object.c
@@ -1044,25 +1041,11 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
struct rxrpc_peer *);
-
-static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
-{
- atomic_inc(&peer->usage);
- return peer;
-}
-
-static inline
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
-{
- return atomic_inc_not_zero(&peer->usage) ? peer : NULL;
-}
-
-extern void __rxrpc_put_peer(struct rxrpc_peer *peer);
-static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
- if (peer && atomic_dec_and_test(&peer->usage))
- __rxrpc_put_peer(peer);
-}
+void rxrpc_destroy_all_peers(struct rxrpc_net *);
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
+void rxrpc_put_peer(struct rxrpc_peer *);
+void __rxrpc_queue_peer_error(struct rxrpc_peer *);
/*
* proc.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3028298ca561..a9a9be5519b9 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -34,7 +34,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
struct rxrpc_backlog *b,
rxrpc_notify_rx_t notify_rx,
rxrpc_user_attach_call_t user_attach_call,
- unsigned long user_call_ID, gfp_t gfp)
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id)
{
const void *here = __builtin_return_address(0);
struct rxrpc_call *call;
@@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
/* Now it gets complicated, because calls get registered with the
* socket here, particularly if a user ID is preassigned by the user.
*/
- call = rxrpc_alloc_call(rx, gfp);
+ call = rxrpc_alloc_call(rx, gfp, debug_id);
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
@@ -137,6 +138,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
+ rxnet = call->rxnet;
write_lock(&rxnet->call_lock);
list_add_tail(&call->link, &rxnet->calls);
write_unlock(&rxnet->call_lock);
@@ -174,7 +176,8 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
if (rx->discard_new_call)
return 0;
- while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0)
+ while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp,
+ atomic_inc_return(&rxrpc_debug_id)) == 0)
;
return 0;
@@ -216,6 +219,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
list_del(&conn->proc_link);
write_unlock(&rxnet->conn_lock);
kfree(conn);
+ if (atomic_dec_and_test(&rxnet->nr_conns))
+ wake_up_var(&rxnet->nr_conns);
tail = (tail + 1) & (size - 1);
}
@@ -223,7 +228,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
- call->socket = rx;
+ rcu_assign_pointer(call->socket, rx);
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
@@ -293,8 +298,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
b->conn_backlog[conn_tail] = NULL;
smp_store_release(&b->conn_backlog_tail,
(conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- rxrpc_get_local(local);
- conn->params.local = local;
+ conn->params.local = rxrpc_get_local(local);
conn->params.peer = peer;
rxrpc_see_connection(conn);
rxrpc_new_incoming_connection(rx, conn, skb);
@@ -347,7 +351,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
service_id == rx->second_service))
goto found_service;
- trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_INVALID_OPERATION, EOPNOTSUPP);
skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
skb->priority = RX_INVALID_OPERATION;
@@ -358,7 +362,7 @@ found_service:
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber,
+ trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
skb->priority = RX_INVALID_OPERATION;
@@ -454,6 +458,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
unsigned long user_call_ID,
rxrpc_notify_rx_t notify_rx)
__releases(&rx->sk.sk_lock.slock)
+ __acquires(call->user_mutex)
{
struct rxrpc_call *call;
struct rb_node *parent, **pp;
@@ -635,6 +640,7 @@ out_discard:
* @user_attach_call: Func to attach call to user_call_ID
* @user_call_ID: The tag to attach to the preallocated call
* @gfp: The allocation conditions.
+ * @debug_id: The tracing debug ID.
*
* Charge up the socket with preallocated calls, each with a user ID. A
* function should be provided to effect the attachment from the user's side.
@@ -645,7 +651,8 @@ out_discard:
int rxrpc_kernel_charge_accept(struct socket *sock,
rxrpc_notify_rx_t notify_rx,
rxrpc_user_attach_call_t user_attach_call,
- unsigned long user_call_ID, gfp_t gfp)
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct rxrpc_backlog *b = rx->backlog;
@@ -655,6 +662,6 @@ int rxrpc_kernel_charge_accept(struct socket *sock,
return rxrpc_service_prealloc_one(rx, b, notify_rx,
user_attach_call, user_call_ID,
- gfp);
+ gfp, debug_id);
}
EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index ad2ab1103189..6e0d788b4dc4 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -195,6 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
* the packets in the Tx buffer we're going to resend and what the new
* resend timeout will be.
*/
+ trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK);
oldest = now;
for (seq = cursor + 1; before_eq(seq, top); seq++) {
ix = seq & RXRPC_RXTX_BUFF_MASK;
@@ -225,7 +226,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
- resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
resend_at += jiffies + rxrpc_resend_timeout;
WRITE_ONCE(call->resend_at, resend_at);
@@ -237,7 +238,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
* retransmitting data.
*/
if (!retrans) {
- rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_reduce_call_timer(call, resend_at, now_j,
rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 0b2db38dd32d..f6734d8cb01a 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -99,9 +99,11 @@ found_extant_call:
/*
* allocate a new call
*/
-struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
+ unsigned int debug_id)
{
struct rxrpc_call *call;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
if (!call)
@@ -138,7 +140,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
spin_lock_init(&call->notify_lock);
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
- call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
@@ -152,6 +154,9 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
call->cong_cwnd = 2;
call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
+
+ call->rxnet = rxnet;
+ atomic_inc(&rxnet->nr_calls);
return call;
nomem_2:
@@ -166,14 +171,15 @@ nomem:
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+ gfp_t gfp,
+ unsigned int debug_id)
{
struct rxrpc_call *call;
ktime_t now;
_enter("");
- call = rxrpc_alloc_call(rx, gfp);
+ call = rxrpc_alloc_call(rx, gfp, debug_id);
if (!call)
return ERR_PTR(-ENOMEM);
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -214,18 +220,20 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
struct sockaddr_rxrpc *srx,
struct rxrpc_call_params *p,
- gfp_t gfp)
+ gfp_t gfp,
+ unsigned int debug_id)
__releases(&rx->sk.sk_lock.slock)
+ __acquires(&call->user_mutex)
{
struct rxrpc_call *call, *xcall;
- struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
+ struct rxrpc_net *rxnet;
struct rb_node *parent, **pp;
const void *here = __builtin_return_address(0);
int ret;
_enter("%p,%lx", rx, p->user_call_ID);
- call = rxrpc_alloc_client_call(rx, srx, gfp);
+ call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
_leave(" = %ld", PTR_ERR(call));
@@ -268,6 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
+ rxnet = call->rxnet;
write_lock(&rxnet->call_lock);
list_add_tail(&call->link, &rxnet->calls);
write_unlock(&rxnet->call_lock);
@@ -613,7 +622,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
*/
void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
- struct rxrpc_net *rxnet;
+ struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
int n;
@@ -627,7 +636,6 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
- rxnet = rxrpc_net(sock_net(&call->socket->sk));
write_lock(&rxnet->call_lock);
list_del_init(&call->link);
write_unlock(&rxnet->call_lock);
@@ -643,11 +651,14 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
{
struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_net *rxnet = call->rxnet;
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
kfree(call->rxtx_annotations);
kmem_cache_free(rxrpc_call_jar, call);
+ if (atomic_dec_and_test(&rxnet->nr_calls))
+ wake_up_var(&rxnet->nr_calls);
}
/*
@@ -712,4 +723,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
}
write_unlock(&rxnet->call_lock);
+
+ atomic_dec(&rxnet->nr_calls);
+ wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 064175068059..5736f643c516 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -207,6 +207,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
if (ret < 0)
goto error_2;
+ atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
@@ -776,7 +777,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
unsigned int channel = call->cid & RXRPC_CHANNELMASK;
struct rxrpc_connection *conn = call->conn;
struct rxrpc_channel *chan = &conn->channels[channel];
- struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk));
+ struct rxrpc_net *rxnet = conn->params.local->rxnet;
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
call->conn = NULL;
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b1dfae107431..c717152070df 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -136,6 +136,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
}
kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
+ conn->params.peer->last_tx_at = ktime_get_real();
_leave("");
return;
}
@@ -160,7 +161,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
lockdep_is_held(&conn->channel_lock));
if (call) {
if (compl == RXRPC_CALL_LOCALLY_ABORTED)
- trace_rxrpc_abort("CON", call->cid,
+ trace_rxrpc_abort(call->debug_id,
+ "CON", call->cid,
call->call_id, 0,
abort_code, error);
if (rxrpc_set_call_completion(call, compl,
@@ -238,6 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
return -EAGAIN;
}
+ conn->params.peer->last_tx_at = ktime_get_real();
+
_leave(" = 0");
return 0;
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ccbac190add1..4c77a78a252a 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -365,6 +365,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
key_put(conn->params.key);
key_put(conn->server_key);
rxrpc_put_peer(conn->params.peer);
+
+ if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns))
+ wake_up_var(&conn->params.local->rxnet->nr_conns);
rxrpc_put_local(conn->params.local);
kfree(conn);
@@ -418,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
*/
if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
continue;
- trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
+ trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
if (rxrpc_conn_is_client(conn))
BUG();
@@ -458,6 +461,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
_enter("");
+ atomic_dec(&rxnet->nr_conns);
rxrpc_destroy_all_client_connections(rxnet);
del_timer_sync(&rxnet->service_conn_reap_timer);
@@ -475,5 +479,9 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
ASSERT(list_empty(&rxnet->conn_proc_list));
+ /* We need to wait for the connections to be destroyed by RCU as they
+ * pin things that we still need to get rid of.
+ */
+ wait_var_event(&rxnet->nr_conns, !atomic_read(&rxnet->nr_conns));
_leave("");
}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index f6fcdb3130a1..80773a50c755 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -132,6 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
atomic_set(&conn->usage, 2);
+ atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->link, &rxnet->service_conns);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 6fc61400337f..0410d2277ca2 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1183,6 +1183,8 @@ void rxrpc_data_ready(struct sock *udp_sk)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
+ if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED))
+ goto discard;
rxrpc_post_packet_to_local(local, skb);
goto out;
@@ -1198,6 +1200,12 @@ void rxrpc_data_ready(struct sock *udp_sk)
!rxrpc_validate_jumbo(skb))
goto bad_message;
break;
+
+ /* Packet types 9-11 should just be ignored. */
+ case RXRPC_PACKET_TYPE_PARAMS:
+ case RXRPC_PACKET_TYPE_10:
+ case RXRPC_PACKET_TYPE_11:
+ goto discard;
}
rcu_read_lock();
@@ -1240,16 +1248,19 @@ void rxrpc_data_ready(struct sock *udp_sk)
goto discard_unlock;
if (sp->hdr.callNumber == chan->last_call) {
- /* For the previous service call, if completed successfully, we
- * discard all further packets.
+ if (chan->call ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ goto discard_unlock;
+
+ /* For the previous service call, if completed
+ * successfully, we discard all further packets.
*/
if (rxrpc_conn_is_service(conn) &&
- (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
- sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+ chan->last_type == RXRPC_PACKET_TYPE_ACK)
goto discard_unlock;
- /* But otherwise we need to retransmit the final packet from
- * data cached in the connection record.
+ /* But otherwise we need to retransmit the final packet
+ * from data cached in the connection record.
*/
rxrpc_post_packet_to_conn(conn, skb);
goto out_unlock;
@@ -1307,21 +1318,21 @@ out_unlock:
wrong_security:
rcu_read_unlock();
- trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RXKADINCONSISTENCY, EBADMSG);
skb->priority = RXKADINCONSISTENCY;
goto post_abort;
reupgrade:
rcu_read_unlock();
- trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_PROTOCOL_ERROR, EBADMSG);
goto protocol_error;
bad_message_unlock:
rcu_read_unlock();
bad_message:
- trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_PROTOCOL_ERROR, EBADMSG);
protocol_error:
skb->priority = RX_PROTOCOL_ERROR;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 38b99db30e54..8b54e9531d52 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -95,6 +95,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
+ trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
}
_leave(" = %p", local);
@@ -257,15 +258,74 @@ addr_in_use:
}
/*
+ * Get a ref on a local endpoint.
+ */
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ n = atomic_inc_return(&local->usage);
+ trace_rxrpc_local(local, rxrpc_local_got, n, here);
+ return local;
+}
+
+/*
+ * Get a ref on a local endpoint unless its usage has already reached 0.
+ */
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+{
+ const void *here = __builtin_return_address(0);
+
+ if (local) {
+ int n = __atomic_add_unless(&local->usage, 1, 0);
+ if (n > 0)
+ trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
+ else
+ local = NULL;
+ }
+ return local;
+}
+
+/*
+ * Queue a local endpoint.
+ */
+void rxrpc_queue_local(struct rxrpc_local *local)
+{
+ const void *here = __builtin_return_address(0);
+
+ if (rxrpc_queue_work(&local->processor))
+ trace_rxrpc_local(local, rxrpc_local_queued,
+ atomic_read(&local->usage), here);
+}
+
+/*
* A local endpoint reached its end of life.
*/
-void __rxrpc_put_local(struct rxrpc_local *local)
+static void __rxrpc_put_local(struct rxrpc_local *local)
{
_enter("%d", local->debug_id);
rxrpc_queue_work(&local->processor);
}
/*
+ * Drop a ref on a local endpoint.
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ if (local) {
+ n = atomic_dec_return(&local->usage);
+ trace_rxrpc_local(local, rxrpc_local_put, n, here);
+
+ if (n == 0)
+ __rxrpc_put_local(local);
+ }
+}
+
+/*
* Destroy a local endpoint's socket and then hand the record to RCU to dispose
* of.
*
@@ -322,7 +382,8 @@ static void rxrpc_local_processor(struct work_struct *work)
container_of(work, struct rxrpc_local, processor);
bool again;
- _enter("%d", local->debug_id);
+ trace_rxrpc_local(local, rxrpc_local_processing,
+ atomic_read(&local->usage), NULL);
do {
again = false;
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index f18c9248e0d4..c7a023fb22d0 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -32,13 +32,22 @@ static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
rxrpc_queue_work(&rxnet->service_conn_reaper);
}
+static void rxrpc_peer_keepalive_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, peer_keepalive_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->peer_keepalive_work);
+}
+
/*
* Initialise a per-network namespace record.
*/
static __net_init int rxrpc_init_net(struct net *net)
{
struct rxrpc_net *rxnet = rxrpc_net(net);
- int ret;
+ int ret, i;
rxnet->live = true;
get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
@@ -46,7 +55,9 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->calls);
rwlock_init(&rxnet->call_lock);
+ atomic_set(&rxnet->nr_calls, 1);
+ atomic_set(&rxnet->nr_conns, 1);
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
@@ -70,8 +81,16 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
+
hash_init(rxnet->peer_hash);
spin_lock_init(&rxnet->peer_hash_lock);
+ for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
+ INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]);
+ INIT_HLIST_HEAD(&rxnet->peer_keepalive_new);
+ timer_setup(&rxnet->peer_keepalive_timer,
+ rxrpc_peer_keepalive_timeout, 0);
+ INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
+ rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC);
ret = -ENOMEM;
rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
@@ -95,8 +114,11 @@ static __net_exit void rxrpc_exit_net(struct net *net)
struct rxrpc_net *rxnet = rxrpc_net(net);
rxnet->live = false;
+ del_timer_sync(&rxnet->peer_keepalive_timer);
+ cancel_work_sync(&rxnet->peer_keepalive_work);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
+ rxrpc_destroy_all_peers(rxnet);
rxrpc_destroy_all_locals(rxnet);
proc_remove(rxnet->proc_net);
}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index cf73dc006c3b..7f1fc04775b3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -32,6 +32,8 @@ struct rxrpc_abort_buffer {
__be32 abort_code;
};
+static const char rxrpc_keepalive_string[] = "";
+
/*
* Arrange for a keepalive ping a certain time after we last transmitted. This
* lets the far side know we're still interested in this call and helps keep
@@ -122,6 +124,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
struct kvec iov[2];
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top;
+ ktime_t now;
size_t len, n;
int ret;
u8 reason;
@@ -203,8 +206,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
}
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ now = ktime_get_real();
if (ping)
- call->ping_time = ktime_get_real();
+ call->ping_time = now;
+ conn->params.peer->last_tx_at = ktime_get_real();
if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
@@ -288,6 +293,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
ret = kernel_sendmsg(conn->params.local->socket,
&msg, iov, 1, sizeof(pkt));
+ conn->params.peer->last_tx_at = ktime_get_real();
rxrpc_put_connection(conn);
return ret;
@@ -378,6 +384,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
* message and update the peer record
*/
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ conn->params.peer->last_tx_at = ktime_get_real();
up_read(&conn->params.local->defrag_sem);
if (ret == -EMSGSIZE)
@@ -429,6 +436,7 @@ send_fragmentable:
if (ret == 0) {
ret = kernel_sendmsg(conn->params.local->socket, &msg,
iov, 2, len);
+ conn->params.peer->last_tx_at = ktime_get_real();
opt = IP_PMTUDISC_DO;
kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -446,6 +454,7 @@ send_fragmentable:
if (ret == 0) {
ret = kernel_sendmsg(conn->params.local->socket, &msg,
iov, 2, len);
+ conn->params.peer->last_tx_at = ktime_get_real();
opt = IPV6_PMTUDISC_DO;
kernel_setsockopt(conn->params.local->socket,
@@ -515,3 +524,51 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
_leave("");
}
+
+/*
+ * Send a VERSION reply to a peer as a keepalive.
+ */
+void rxrpc_send_keepalive(struct rxrpc_peer *peer)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ msg.msg_name = &peer->srx.transport;
+ msg.msg_namelen = peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(peer->local->rxnet->epoch);
+ whdr.cid = 0;
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.serial = 0;
+ whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
+ whdr.flags = RXRPC_LAST_PACKET;
+ whdr.userStatus = 0;
+ whdr.securityIndex = 0;
+ whdr._rsvd = 0;
+ whdr.serviceId = 0;
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = (char *)rxrpc_keepalive_string;
+ iov[1].iov_len = sizeof(rxrpc_keepalive_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ _proto("Tx VERSION (keepalive)");
+
+ ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
+ if (ret < 0)
+ _debug("sendmsg failed: %d", ret);
+
+ peer->last_tx_at = ktime_get_real();
+ _leave("");
+}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 7f749505e699..78c2f95d1f22 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -192,7 +192,7 @@ void rxrpc_error_report(struct sock *sk)
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
/* The ref we obtained is passed off to the work item */
- rxrpc_queue_work(&peer->error_distributor);
+ __rxrpc_queue_peer_error(peer);
_leave("");
}
@@ -348,3 +348,99 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
usage, avg);
}
+
+/*
+ * Perform keep-alive pings with VERSION packets to keep any NAT alive.
+ */
+void rxrpc_peer_keepalive_worker(struct work_struct *work)
+{
+ struct rxrpc_net *rxnet =
+ container_of(work, struct rxrpc_net, peer_keepalive_work);
+ struct rxrpc_peer *peer;
+ unsigned long delay;
+ ktime_t base, now = ktime_get_real();
+ s64 diff;
+ u8 cursor, slot;
+
+ base = rxnet->peer_keepalive_base;
+ cursor = rxnet->peer_keepalive_cursor;
+
+ _enter("%u,%lld", cursor, ktime_sub(now, base));
+
+next_bucket:
+ diff = ktime_to_ns(ktime_sub(now, base));
+ if (diff < 0)
+ goto resched;
+
+ _debug("at %u", cursor);
+ spin_lock_bh(&rxnet->peer_hash_lock);
+next_peer:
+ if (!rxnet->live) {
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+ goto out;
+ }
+
+ /* Everything in the bucket at the cursor is processed this second; the
+ * bucket at cursor + 1 goes now + 1s and so on...
+ */
+ if (hlist_empty(&rxnet->peer_keepalive[cursor])) {
+ if (hlist_empty(&rxnet->peer_keepalive_new)) {
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+ goto emptied_bucket;
+ }
+
+ hlist_move_list(&rxnet->peer_keepalive_new,
+ &rxnet->peer_keepalive[cursor]);
+ }
+
+ peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
+ struct rxrpc_peer, keepalive_link);
+ hlist_del_init(&peer->keepalive_link);
+ if (!rxrpc_get_peer_maybe(peer))
+ goto next_peer;
+
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+
+ _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport);
+
+recalc:
+ diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC);
+ if (diff < -30 || diff > 30)
+ goto send; /* LSW of 64-bit time probably wrapped on 32-bit */
+ diff += RXRPC_KEEPALIVE_TIME - 1;
+ if (diff < 0)
+ goto send;
+
+ slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff;
+ if (slot == 0)
+ goto send;
+
+ /* A transmission to this peer occurred since last we examined it so
+ * put it into the appropriate future bucket.
+ */
+ slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
+ spin_lock_bh(&rxnet->peer_hash_lock);
+ hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]);
+ rxrpc_put_peer(peer);
+ goto next_peer;
+
+send:
+ rxrpc_send_keepalive(peer);
+ now = ktime_get_real();
+ goto recalc;
+
+emptied_bucket:
+ cursor++;
+ if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
+ cursor = 0;
+ base = ktime_add_ns(base, NSEC_PER_SEC);
+ goto next_bucket;
+
+resched:
+ rxnet->peer_keepalive_base = base;
+ rxnet->peer_keepalive_cursor = cursor;
+ delay = nsecs_to_jiffies(-diff) + 1;
+ timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
+out:
+ _leave("");
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index d02a99f37f5f..1b7e8107b3ae 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -322,6 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
if (!peer) {
peer = prealloc;
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
+ hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new);
}
spin_unlock(&rxnet->peer_hash_lock);
@@ -363,9 +364,12 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
- if (!peer)
+ if (!peer) {
hash_add_rcu(rxnet->peer_hash,
&candidate->hash_link, hash_key);
+ hlist_add_head(&candidate->keepalive_link,
+ &rxnet->peer_keepalive_new);
+ }
spin_unlock_bh(&rxnet->peer_hash_lock);
@@ -382,9 +386,54 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
}
/*
- * Discard a ref on a remote peer record.
+ * Get a ref on a peer record.
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ n = atomic_inc_return(&peer->usage);
+ trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+ return peer;
+}
+
+/*
+ * Get a ref on a peer record unless its usage has already reached 0.
+ */
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+{
+ const void *here = __builtin_return_address(0);
+
+ if (peer) {
+ int n = __atomic_add_unless(&peer->usage, 1, 0);
+ if (n > 0)
+ trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+ else
+ peer = NULL;
+ }
+ return peer;
+}
+
+/*
+ * Queue a peer record. This passes the caller's ref to the workqueue.
+ */
+void __rxrpc_queue_peer_error(struct rxrpc_peer *peer)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ n = atomic_read(&peer->usage);
+ if (rxrpc_queue_work(&peer->error_distributor))
+ trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here);
+ else
+ rxrpc_put_peer(peer);
+}
+
+/*
+ * Discard a peer record.
*/
-void __rxrpc_put_peer(struct rxrpc_peer *peer)
+static void __rxrpc_put_peer(struct rxrpc_peer *peer)
{
struct rxrpc_net *rxnet = peer->local->rxnet;
@@ -392,11 +441,49 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer)
spin_lock_bh(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
+ hlist_del_init(&peer->keepalive_link);
spin_unlock_bh(&rxnet->peer_hash_lock);
kfree_rcu(peer, rcu);
}
+/*
+ * Drop a ref on a peer record.
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ if (peer) {
+ n = atomic_dec_return(&peer->usage);
+ trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ if (n == 0)
+ __rxrpc_put_peer(peer);
+ }
+}
+
+/*
+ * Make sure all peer records have been discarded.
+ */
+void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
+{
+ struct rxrpc_peer *peer;
+ int i;
+
+ for (i = 0; i < HASH_SIZE(rxnet->peer_hash); i++) {
+ if (hlist_empty(&rxnet->peer_hash[i]))
+ continue;
+
+ hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
+ pr_err("Leaked peer %u {%u} %pISp\n",
+ peer->debug_id,
+ atomic_read(&peer->usage),
+ &peer->srx.transport);
+ }
+ }
+}
+
/**
* rxrpc_kernel_get_peer - Get the peer address of a call
* @sock: The socket on which the call is in progress.
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index f79f260c6ddc..7e45db058823 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -29,6 +29,8 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
* generate a list of extant and dead calls in /proc/net/rxrpc_calls
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+ __acquires(rxnet->call_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
@@ -45,6 +47,8 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->call_lock)
+ __releases(rcu)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
@@ -135,6 +139,7 @@ const struct file_operations rxrpc_call_seq_fops = {
* generate a list of extant virtual connections in /proc/net/rxrpc_conns
*/
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rxnet->conn_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
@@ -151,6 +156,7 @@ static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
}
static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->conn_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index 4bddcf3face3..93da73bf7098 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -46,6 +46,9 @@ struct rxrpc_wire_header {
#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */
#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */
#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */
+#define RXRPC_PACKET_TYPE_PARAMS 9 /* Parameter negotiation (unspec'd, ignore) */
+#define RXRPC_PACKET_TYPE_10 10 /* Ignored */
+#define RXRPC_PACKET_TYPE_11 11 /* Ignored */
#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */
#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */
@@ -78,6 +81,9 @@ struct rxrpc_wire_header {
(1 << RXRPC_PACKET_TYPE_CHALLENGE) | \
(1 << RXRPC_PACKET_TYPE_RESPONSE) | \
/*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \
+ (1 << RXRPC_PACKET_TYPE_PARAMS) | \
+ (1 << RXRPC_PACKET_TYPE_10) | \
+ (1 << RXRPC_PACKET_TYPE_11) | \
(1 << RXRPC_PACKET_TYPE_VERSION))
/*****************************************************************************/
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9d45d8b56744..7bff716e911e 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int *_offset, unsigned int *_len)
{
unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len = *_len;
+ unsigned int len;
int ret;
u8 annotation = *_annotation;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 77cb23c7bd0a..588fea0dd362 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -668,6 +668,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
return -EAGAIN;
}
+ conn->params.peer->last_tx_at = ktime_get_real();
_leave(" = 0");
return 0;
}
@@ -722,6 +723,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
return -EAGAIN;
}
+ conn->params.peer->last_tx_at = ktime_get_real();
_leave(" = 0");
return 0;
}
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index e9f428351293..c4479afe8ae7 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -19,9 +19,6 @@
#include <keys/rxrpc-type.h>
#include "ar-internal.h"
-static LIST_HEAD(rxrpc_security_methods);
-static DECLARE_RWSEM(rxrpc_security_sem);
-
static const struct rxrpc_security *rxrpc_security_types[] = {
[RXRPC_SECURITY_NONE] = &rxrpc_no_security,
#ifdef CONFIG_RXKAD
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 09f2a3e05221..206e802ccbdc 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -130,7 +130,9 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
spin_lock_bh(&call->lock);
if (call->state < RXRPC_CALL_COMPLETE) {
- call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+ call->rxtx_annotations[ix] =
+ (call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) |
+ RXRPC_TX_ANNO_RETRANS;
if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
rxrpc_queue_call(call);
}
@@ -554,6 +556,7 @@ static struct rxrpc_call *
rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
struct rxrpc_send_params *p)
__releases(&rx->sk.sk_lock.slock)
+ __acquires(&call->user_mutex)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -579,9 +582,11 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+ atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
+ rxrpc_put_peer(cp.peer);
_leave(" = %p\n", call);
return call;
}
@@ -593,6 +598,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
*/
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
+ __releases(&call->user_mutex)
{
enum rxrpc_call_state state;
struct rxrpc_call *call;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f24a6ae6819a..a01169fb5325 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
To compile this code as a module, choose M here: the
module will be called em_ipset.
+config NET_EMATCH_IPT
+ tristate "IPtables Matches"
+ depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+ ---help---
+ Say Y here to be able to classify packets based on iptables
+ matches.
+ Current supported match is "policy" which allows packet classification
+ based on IPsec policy that was used during decapsulation
+
+ To compile this code as a module, choose M here: the
+ module will be called em_ipt.
+
config NET_CLS_ACT
bool "Actions"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b635447e3f8..8811d3804878 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META) += em_meta.o
obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o
obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o
obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT) += em_ipt.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..72251241665a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
}
EXPORT_SYMBOL(__tcf_idr_release);
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+ u32 cookie_len = 0;
+
+ if (act->act_cookie)
+ cookie_len = nla_total_size(act->act_cookie->len);
+
+ return nla_total_size(0) /* action number nested */
+ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+ + cookie_len /* TCA_ACT_COOKIE */
+ + nla_total_size(0) /* TCA_ACT_STATS nested */
+ /* TCA_STATS_BASIC */
+ + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+ /* TCA_STATS_QUEUE */
+ + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+ + nla_total_size(0) /* TCA_OPTIONS nested */
+ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+ return NLMSG_HDRLEN /* struct nlmsghdr */
+ + sizeof(struct tcamsg)
+ + nla_total_size(0) /* TCA_ACT_TAB nested */
+ + sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+ size_t sz = tcf_action_shared_attrs_size(act);
+
+ if (act->ops->get_fill_size)
+ return act->ops->get_fill_size(act) + sz;
+ return sz;
+}
+
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -135,8 +171,10 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
continue;
nest = nla_nest_start(skb, n_i);
- if (!nest)
+ if (!nest) {
+ index--;
goto nla_put_failure;
+ }
err = tcf_action_dump_1(skb, p, 0, 0);
if (err < 0) {
index--;
@@ -202,7 +240,8 @@ nla_put_failure:
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -211,7 +250,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
- WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+ WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+ NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
return -EINVAL;
}
}
@@ -258,14 +298,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
}
EXPORT_SYMBOL(tcf_idr_check);
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
-{
- if (est)
- gen_kill_estimator(&a->tcfa_rate_est);
- free_tcf(a);
-}
-EXPORT_SYMBOL(tcf_idr_cleanup);
-
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats)
@@ -605,7 +637,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind)
+ char *name, int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action *a;
struct tc_action_ops *a_o;
@@ -616,31 +649,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
int err;
if (name == NULL) {
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
- if (kind == NULL)
+ if (!kind) {
+ NL_SET_ERR_MSG(extack, "TC action kind must be specified");
goto err_out;
- if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+ }
+ if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
goto err_out;
+ }
if (tb[TCA_ACT_COOKIE]) {
int cklen = nla_len(tb[TCA_ACT_COOKIE]);
- if (cklen > TC_COOKIE_MAX_SIZE)
+ if (cklen > TC_COOKIE_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
goto err_out;
+ }
cookie = nla_memdup_cookie(tb);
if (!cookie) {
+ NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
err = -ENOMEM;
goto err_out;
}
}
} else {
- err = -EINVAL;
- if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+ if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
+ err = -EINVAL;
goto err_out;
+ }
}
a_o = tc_lookup_action_n(act_name);
@@ -663,15 +705,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
goto err_mod;
}
#endif
+ NL_SET_ERR_MSG(extack, "Failed to load TC action module");
err = -ENOENT;
goto err_out;
}
/* backward compatibility for policer */
if (name == NULL)
- err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+ err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+ extack);
else
- err = a_o->init(net, nla, est, &a, ovr, bind);
+ err = a_o->init(net, nla, est, &a, ovr, bind, extack);
if (err < 0)
goto err_mod;
@@ -697,6 +741,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
list_add_tail(&a->list, &actions);
tcf_action_destroy(&actions, bind);
+ NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
return ERR_PTR(err);
}
}
@@ -726,29 +771,35 @@ static void cleanup_a(struct list_head *actions, int ovr)
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions)
+ struct list_head *actions, size_t *attr_size,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
+ size_t sz = 0;
int err;
int i;
- err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (err < 0)
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+ act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+ extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
}
act->order = i;
+ sz += tcf_action_fill_size(act);
if (ovr)
act->tcfa_refcnt++;
list_add_tail(&act->list, actions);
}
+ *attr_size = tcf_action_full_attrs_size(sz);
+
/* Remove the temp refcnt which was necessary to protect against
* destroying an existing action which was being replaced
*/
@@ -822,7 +873,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
- if (nest == NULL)
+ if (!nest)
goto out_nlmsg_trim;
if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -840,7 +891,8 @@ out_nlmsg_trim:
static int
tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
- struct list_head *actions, int event)
+ struct list_head *actions, int event,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
@@ -849,6 +901,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
0, 0) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
}
@@ -857,7 +910,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
}
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid)
+ struct nlmsghdr *n, u32 portid,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
@@ -865,22 +919,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
int index;
int err;
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
if (tb[TCA_ACT_INDEX] == NULL ||
- nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+ nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+ NL_SET_ERR_MSG(extack, "Invalid TC action index value");
goto err_out;
+ }
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -EINVAL;
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
- if (!ops) /* could happen in batch of actions */
+ if (!ops) { /* could happen in batch of actions */
+ NL_SET_ERR_MSG(extack, "Specified TC action not found");
goto err_out;
+ }
err = -ENOENT;
- if (ops->lookup(net, &a, index) == 0)
+ if (ops->lookup(net, &a, index, extack) == 0)
goto err_mod;
module_put(ops->owner);
@@ -893,7 +951,8 @@ err_out:
}
static int tca_action_flush(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid)
+ struct nlmsghdr *n, u32 portid,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
unsigned char *b;
@@ -907,39 +966,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
int err = -ENOMEM;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb) {
- pr_debug("tca_action_flush: failed skb alloc\n");
+ if (!skb)
return err;
- }
b = skb_tail_pointer(skb);
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
ops = tc_lookup_action(kind);
- if (!ops) /*some idjot trying to flush unknown action */
+ if (!ops) { /*some idjot trying to flush unknown action */
+ NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
goto err_out;
+ }
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
sizeof(*t), 0);
- if (!nlh)
+ if (!nlh) {
+ NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
goto out_module_put;
+ }
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
- if (nest == NULL)
+ if (!nest) {
+ NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
goto out_module_put;
+ }
- err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
- if (err <= 0)
+ err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+ if (err <= 0) {
+ nla_nest_cancel(skb, nest);
goto out_module_put;
+ }
nla_nest_end(skb, nest);
@@ -950,6 +1015,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
return err;
@@ -962,17 +1029,19 @@ err_out:
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid)
+ u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
int ret;
struct sk_buff *skb;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+ GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 1) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
}
@@ -980,6 +1049,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
/* now do the delete */
ret = tcf_action_destroy(actions, 0);
if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
return ret;
}
@@ -993,38 +1063,43 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
- u32 portid, int event)
+ u32 portid, int event, struct netlink_ext_ack *extack)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
+ size_t attr_size = 0;
LIST_HEAD(actions);
- ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+ ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (ret < 0)
return ret;
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
- if (tb[1] != NULL)
- return tca_action_flush(net, tb[1], n, portid);
- else
- return -EINVAL;
+ if (tb[1])
+ return tca_action_flush(net, tb[1], n, portid, extack);
+
+ NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
+ return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_get_1(net, tb[i], n, portid);
+ act = tcf_action_get_1(net, tb[i], n, portid, extack);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
}
act->order = i;
+ attr_size += tcf_action_fill_size(act);
list_add_tail(&act->list, &actions);
}
+ attr_size = tcf_action_full_attrs_size(attr_size);
+
if (event == RTM_GETACTION)
- ret = tcf_get_notify(net, portid, n, &actions, event);
+ ret = tcf_get_notify(net, portid, n, &actions, event, extack);
else { /* delete */
- ret = tcf_del_notify(net, n, &actions, portid);
+ ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
if (ret)
goto err;
return ret;
@@ -1037,17 +1112,19 @@ err:
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid)
+ u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
int err = 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+ GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
}
@@ -1060,16 +1137,19 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid, int ovr)
+ struct nlmsghdr *n, u32 portid, int ovr,
+ struct netlink_ext_ack *extack)
{
+ size_t attr_size = 0;
int ret = 0;
LIST_HEAD(actions);
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+ &attr_size, extack);
if (ret)
return ret;
- return tcf_add_notify(net, n, &actions, portid);
+ return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1097,7 +1177,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
return ret;
if (tca[TCA_ACT_TAB] == NULL) {
- pr_notice("tc_ctl_action: received NO action attribs\n");
+ NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
return -EINVAL;
}
@@ -1113,17 +1193,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
replay:
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+ extack);
if (ret == -EAGAIN)
goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- portid, RTM_DELACTION);
+ portid, RTM_DELACTION, extack);
break;
case RTM_GETACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- portid, RTM_GETACTION);
+ portid, RTM_GETACTION, extack);
break;
default:
BUG();
@@ -1218,7 +1299,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto out_module_put;
- ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+ ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
if (ret < 0)
goto out_module_put;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3f2c15affa7..18089c02e557 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -248,10 +248,14 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
{
- if (cfg->is_ebpf)
- bpf_prog_put(cfg->filter);
- else
- bpf_prog_destroy(cfg->filter);
+ struct bpf_prog *filter = cfg->filter;
+
+ if (filter) {
+ if (cfg->is_ebpf)
+ bpf_prog_put(filter);
+ else
+ bpf_prog_destroy(filter);
+ }
kfree(cfg->bpf_ops);
kfree(cfg->bpf_name);
@@ -272,7 +276,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind)
+ int replace, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -352,7 +356,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return res;
out:
if (res == ACT_P_CREATED)
- tcf_idr_cleanup(*act, est);
+ tcf_idr_release(*act, bind);
return ret;
}
@@ -367,14 +371,16 @@ static void tcf_bpf_cleanup(struct tc_action *act)
static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b15ba84e0c8..e4b880fa51fe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
@@ -176,14 +177,16 @@ nla_put_failure:
static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b7ba9b06b147..7e28b2ce1437 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_old, *params_new;
@@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
{
struct sctphdr *sctph;
- if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+ if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
return 1;
sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
@@ -626,19 +626,22 @@ static void tcf_csum_cleanup(struct tc_action *a)
struct tcf_csum_params *params;
params = rcu_dereference_protected(p->params, 1);
- kfree_rcu(params, rcu);
+ if (params)
+ kfree_rcu(params, rcu);
}
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b56986d41c87..4dc4f153cad8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -201,20 +201,35 @@ nla_put_failure:
static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
return tcf_idr_search(tn, a, index);
}
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+ size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+ if (to_gact(act)->tcfg_ptype)
+ /* TCA_GACT_PROB */
+ sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+ return sz;
+}
+
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -225,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
.init = tcf_gact_init,
.walk = tcf_gact_walker,
.lookup = tcf_gact_search,
+ .get_fill_size = tcf_gact_get_fill_size,
.size = sizeof(struct tcf_gact),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5954e992685a..a5994cf0512b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 06e380ae0928..14c312d7908f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
static void tcf_ipt_release(struct tc_action *a)
{
struct tcf_ipt *ipt = to_ipt(a);
- ipt_destroy_target(ipt->tcfi_t);
+
+ if (ipt->tcfi_t) {
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_t);
+ }
kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -187,13 +190,13 @@ err2:
kfree(tname);
err1:
if (ret == ACT_P_CREATED)
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return err;
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
bind);
@@ -201,7 +204,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
bind);
@@ -303,14 +306,16 @@ nla_put_failure:
static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
@@ -351,14 +356,16 @@ static struct pernet_operations ipt_net_ops = {
static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..fd34015331ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
bool exists = false;
int ret;
- if (nla == NULL)
+ if (!nla) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+ }
+ ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
if (ret < 0)
return ret;
- if (tb[TCA_MIRRED_PARMS] == NULL)
+ if (!tb[TCA_MIRRED_PARMS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
return -EINVAL;
+ }
parm = nla_data(tb[TCA_MIRRED_PARMS]);
exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
default:
if (exists)
tcf_idr_release(*a, bind);
+ NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
return -EINVAL;
}
if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- if (dev == NULL)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
+ }
ret = tcf_idr_create(tn, parm->index, est, a,
&act_mirred_ops, bind, true);
if (ret)
@@ -265,14 +272,16 @@ nla_put_failure:
static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 98c6a4b2f523..4b5848b6c252 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind)
+ struct tc_action **a, int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -277,14 +278,16 @@ nla_put_failure:
static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 349beaffb29e..8a925c72db5f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p = to_pedit(*a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
kfree(keys_ex);
return -ENOMEM;
}
@@ -419,14 +419,16 @@ nla_put_failure:
static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 95d3c9097b25..4e72bc2a0dfb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops;
static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_act_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind,
+ struct netlink_ext_ack *extack)
{
int ret = 0, err;
struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -194,7 +196,7 @@ failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return err;
}
@@ -304,7 +306,8 @@ nla_put_failure:
return -1;
}
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1ba0df238756..5db358497c9e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind)
+ int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a)
psample_group = rtnl_dereference(s->psample_group);
RCU_INIT_POINTER(s->psample_group, NULL);
- psample_group_put(psample_group);
+ if (psample_group)
+ psample_group_put(psample_group);
}
static bool tcf_sample_dev_ok_push(struct net_device *dev)
@@ -202,14 +203,16 @@ nla_put_failure:
static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 425eac11f6da..9618b4a83cee 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
d = to_defact(*a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- tcf_idr_cleanup(*a, est);
+ tcf_idr_release(*a, bind);
return ret;
}
d->tcf_action = parm->action;
@@ -170,14 +170,16 @@ nla_put_failure:
static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 5a3f691bb545..ddf69fc01bdf 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -208,14 +208,16 @@ nla_put_failure:
static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index fa975262dbac..bbcbdce732cc 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
ASSERT_RTNL();
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- if (ovr)
+ if (ret == ACT_P_CREATED)
tcf_idr_release(*a, bind);
return -ENOMEM;
}
@@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a)
struct tcf_skbmod_params *p;
p = rcu_dereference_protected(d->skbmod_p, 1);
- kfree_rcu(p, rcu);
+ if (p)
+ kfree_rcu(p, rcu);
}
static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
@@ -232,14 +233,16 @@ nla_put_failure:
static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0e23aac09ad6..626dac81a48a 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
break;
default:
+ ret = -EINVAL;
goto err_out;
}
@@ -207,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a)
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params, 1);
+ if (params) {
+ if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ dst_release(&params->tcft_enc_metadata->dst);
- if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
- dst_release(&params->tcft_enc_metadata->dst);
-
- kfree_rcu(params, rcu);
+ kfree_rcu(params, rcu);
+ }
}
static int tunnel_key_dump_addresses(struct sk_buff *skb,
@@ -291,14 +293,16 @@ nla_put_failure:
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index e1a1b3f3983a..853604685965 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind)
+ int ovr, int bind, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -117,7 +117,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct tc_vlan *parm;
struct tcf_vlan *v;
int action;
- __be16 push_vid = 0;
+ u16 push_vid = 0;
__be16 push_proto = 0;
u8 push_prio = 0;
bool exists = false;
@@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
ASSERT_RTNL();
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- if (ovr)
+ if (ret == ACT_P_CREATED)
tcf_idr_release(*a, bind);
return -ENOMEM;
}
@@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a)
struct tcf_vlan_params *p;
p = rcu_dereference_protected(v->vlan_p, 1);
- kfree_rcu(p, rcu);
+ if (p)
+ kfree_rcu(p, rcu);
}
static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
@@ -267,14 +268,16 @@ nla_put_failure:
static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tcf_generic_walker(tn, skb, cb, type, ops);
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 247b7cc20c13..b66754f52a9f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1433,11 +1433,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
#ifdef CONFIG_NET_CLS_ACT
{
struct tc_action *act;
+ size_t attr_size = 0;
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND);
+ TCA_ACT_BIND, extack);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- &actions);
+ &actions, &attr_size, extack);
if (err)
return err;
list_for_each_entry(act, &actions, list)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7d0ce2c40f93..d964e60c730e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
fl_set_key_flag(key, mask, flags_key, flags_mask,
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+ FLOW_DIS_FIRST_FRAG);
return 0;
}
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
fl_get_key_flag(flags_key, flags_mask, &key, &mask,
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+ FLOW_DIS_FIRST_FRAG);
_key = cpu_to_be32(key);
_mask = cpu_to_be32(mask);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ed8b6a24b9e9..bac47b5d18fd 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -489,6 +489,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
RCU_INIT_POINTER(*kp, key->next);
tcf_unbind_filter(tp, &key->res);
+ idr_remove(&ht->handle_idr, key->handle);
tcf_exts_get_net(&key->exts);
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 000000000000..a5f34e930eff
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,257 @@
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+ const struct xt_match *match;
+ u32 hook;
+ u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+ char *match_name;
+ int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+ [TCA_EM_IPT_MATCH_NAME] = { .type = NLA_STRING,
+ .len = XT_EXTENSION_MAXNAMELEN },
+ [TCA_EM_IPT_MATCH_REVISION] = { .type = NLA_U8 },
+ [TCA_EM_IPT_HOOK] = { .type = NLA_U32 },
+ [TCA_EM_IPT_NFPROTO] = { .type = NLA_U8 },
+ [TCA_EM_IPT_MATCH_DATA] = { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+ struct xt_mtchk_param mtpar = {};
+ union {
+ struct ipt_entry e4;
+ struct ip6t_entry e6;
+ } e = {};
+
+ mtpar.net = net;
+ mtpar.table = "filter";
+ mtpar.hook_mask = 1 << im->hook;
+ mtpar.family = im->match->family;
+ mtpar.match = im->match;
+ mtpar.entryinfo = &e;
+ mtpar.matchinfo = (void *)im->match_data;
+ return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+ if (mrev != 0) {
+ pr_err("only policy match revision 0 supported");
+ return -EINVAL;
+ }
+
+ if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+ pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+ {
+ .match_name = "policy",
+ .validate_match_data = policy_validate_match_data
+ },
+ {}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+ const struct em_ipt_xt_match *m;
+ struct nlattr *mname_attr;
+ u8 nfproto, mrev = 0;
+ int ret;
+
+ mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+ for (m = em_ipt_xt_matches; m->match_name; m++) {
+ if (!nla_strcmp(mname_attr, m->match_name))
+ break;
+ }
+
+ if (!m->match_name) {
+ pr_err("Unsupported xt match");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (tb[TCA_EM_IPT_MATCH_REVISION])
+ mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+ ret = m->validate_match_data(tb, mrev);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+ return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+ struct tcf_ematch *em)
+{
+ struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+ struct em_ipt_match *im = NULL;
+ struct xt_match *match;
+ int mdata_len, ret;
+
+ ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+ NULL);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+ !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+ return -EINVAL;
+
+ match = get_xt_match(tb);
+ if (IS_ERR(match)) {
+ pr_err("unable to load match\n");
+ return PTR_ERR(match);
+ }
+
+ mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+ im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+ if (!im) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ im->match = match;
+ im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+ nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+ ret = check_match(net, im, mdata_len);
+ if (ret)
+ goto err;
+
+ em->datalen = sizeof(*im) + mdata_len;
+ em->data = (unsigned long)im;
+ return 0;
+
+err:
+ kfree(im);
+ module_put(match->me);
+ return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+ struct em_ipt_match *im = (void *)em->data;
+
+ if (!im)
+ return;
+
+ if (im->match->destroy) {
+ struct xt_mtdtor_param par = {
+ .net = em->net,
+ .match = im->match,
+ .matchinfo = im->match_data,
+ .family = im->match->family
+ };
+ im->match->destroy(&par);
+ }
+ module_put(im->match->me);
+ kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+ struct tcf_pkt_info *info)
+{
+ const struct em_ipt_match *im = (const void *)em->data;
+ struct xt_action_param acpar = {};
+ struct net_device *indev = NULL;
+ struct nf_hook_state state;
+ int ret;
+
+ rcu_read_lock();
+
+ if (skb->skb_iif)
+ indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+ nf_hook_state_init(&state, im->hook, im->match->family,
+ indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+ acpar.match = im->match;
+ acpar.matchinfo = im->match_data;
+ acpar.state = &state;
+
+ ret = im->match->match(skb, &acpar);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+ struct em_ipt_match *im = (void *)em->data;
+
+ if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+ return -EMSGSIZE;
+ if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+ im->match->usersize ?: im->match->matchsize,
+ im->match_data) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+ .kind = TCF_EM_IPT,
+ .change = em_ipt_change,
+ .destroy = em_ipt_destroy,
+ .match = em_ipt_match,
+ .dump = em_ipt_dump,
+ .owner = THIS_MODULE,
+ .link = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+ return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+ tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d512f49ee83c..106dae7e4818 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
unsigned int len)
{
+ bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
* If child was empty even before update then backlog
* counter is screwed and we skip notification because
* parent class is already passive.
+ *
+ * If the original child was offloaded then it is allowed
+ * to be seem as empty, so the parent is notified anyway.
*/
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 190570f21b20..39c144b6ff98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
__skb_queue_tail(&q->skb_bad_txq, skb);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ qdisc_qstats_backlog_inc(q, skb);
+ q->q.qlen++;
+ }
+
if (lock)
spin_unlock(lock);
}
@@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
qdisc_enqueue_skb_bad_txq(q, nskb);
-
- if (qdisc_is_percpu_stats(q)) {
- qdisc_qstats_cpu_backlog_inc(q, nskb);
- qdisc_qstats_cpu_qlen_inc(q);
- } else {
- qdisc_qstats_backlog_inc(q, nskb);
- q->q.qlen++;
- }
break;
}
skb->next = nskb;
@@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
*/
static inline bool qdisc_restart(struct Qdisc *q, int *packets)
{
+ bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
spinlock_t *root_lock = NULL;
struct netdev_queue *txq;
struct net_device *dev;
struct sk_buff *skb;
- bool validate;
/* Dequeue packet */
+ if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+ return false;
+
skb = dequeue_skb(q, &validate, packets);
- if (unlikely(!skb))
+ if (unlikely(!skb)) {
+ if (nolock)
+ clear_bit(__QDISC_STATE_RUNNING, &q->state);
return false;
+ }
- if (!(q->flags & TCQ_F_NOLOCK))
+ if (!nolock)
root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
- return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+ more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+ if (nolock)
+ clear_bit(__QDISC_STATE_RUNNING, &q->state);
+ return more;
}
void __qdisc_run(struct Qdisc *q)
@@ -628,6 +637,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
int band = prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct skb_array *q = band2list(priv, band);
+ unsigned int pkt_len = qdisc_pkt_len(skb);
int err;
err = skb_array_produce(q, skb);
@@ -636,7 +646,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
return qdisc_drop_cpu(skb, qdisc, to_free);
qdisc_qstats_cpu_qlen_inc(qdisc);
- qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+ /* Note: skb can not be used after skb_array_produce(),
+ * so we better not use qdisc_qstats_cpu_backlog_inc()
+ */
+ this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 1ea9846cc6ce..2a4ab7caf553 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1337,6 +1337,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
u64 rate64, ceil64;
+ int warn = 0;
/* extract all subattrs from opt attr */
if (!opt)
@@ -1499,13 +1500,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->quantum = min_t(u64, quantum, INT_MAX);
if (!hopt->quantum && cl->quantum < 1000) {
- pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
- cl->common.classid);
+ warn = -1;
cl->quantum = 1000;
}
if (!hopt->quantum && cl->quantum > 200000) {
- pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
- cl->common.classid);
+ warn = 1;
cl->quantum = 200000;
}
if (hopt->quantum)
@@ -1519,6 +1518,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_unlock(sch);
+ if (warn)
+ pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
+ cl->common.classid, (warn == -1 ? "small" : "big"));
+
qdisc_class_hash_grow(sch, &q->clhash);
*arg = (unsigned long)cl;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7c179addebcd..7d6801fc5340 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
if (unlikely(sch->q.qlen >= sch->limit))
- return qdisc_drop(skb, sch, to_free);
+ return qdisc_drop_all(skb, sch, to_free);
qdisc_qstats_backlog_inc(sch, skb);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
{
- struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- if (enable) {
+ if (qopt) {
opt.command = TC_PRIO_REPLACE;
- opt.replace_params.bands = q->bands;
- memcpy(&opt.replace_params.priomap, q->prio2band,
+ opt.replace_params.bands = qopt->bands;
+ memcpy(&opt.replace_params.priomap, qopt->priomap,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- prio_offload(sch, false);
+ prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
}
+ prio_offload(sch, qopt);
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_unlock(sch);
- prio_offload(sch, true);
return 0;
}
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tc_prio_qopt_offload graft_offload;
+ struct net_device *dev = qdisc_dev(sch);
unsigned long band = arg - 1;
+ bool any_qdisc_is_offloaded;
+ int err;
if (new == NULL)
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->queues[band]);
+
+ if (!tc_can_offload(dev))
+ return 0;
+
+ graft_offload.handle = sch->handle;
+ graft_offload.parent = sch->parent;
+ graft_offload.graft_params.band = band;
+ graft_offload.graft_params.child_handle = new->handle;
+ graft_offload.command = TC_PRIO_GRAFT;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &graft_offload);
+
+ /* Don't report error if the graft is part of destroy operation. */
+ if (err && new != &noop_qdisc) {
+ /* Don't report error if the parent, the old child and the new
+ * one are not offloaded.
+ */
+ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+ any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+ if (*old)
+ any_qdisc_is_offloaded |= (*old)->flags &
+ TCQ_F_OFFLOADED;
+
+ if (any_qdisc_is_offloaded)
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+ }
+
return 0;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 229172d509cc..03225a8df973 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -188,7 +188,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
int ret;
if (qdisc_pkt_len(skb) > q->max_size) {
- if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
+ if (skb_is_gso(skb) &&
+ skb_gso_validate_mac_len(skb, q->max_size))
return tbf_segment(skb, sch, to_free);
return qdisc_drop(skb, sch, to_free);
}
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6776582ec449..e845e4588535 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
offload.o stream_sched.o stream_sched_prio.o \
stream_sched_rr.o stream_interleave.o
+sctp_diag-y := diag.o
+
sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
sctp-$(CONFIG_PROC_FS) += proc.o
sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 00667c50efa7..e64630cd3331 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
return NULL;
INIT_LIST_HEAD(&new->key_list);
+ refcount_set(&new->refcnt, 1);
new->key_id = key_id;
return new;
}
/* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
{
BUG_ON(!list_empty(&sh_key->key_list));
sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
kfree(sh_key);
}
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+ if (refcount_dec_and_test(&sh_key->refcnt))
+ sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+ refcount_inc(&sh_key->refcnt);
+}
+
/* Destroy the entire key list. This is done during the
* associon and endpoint free process.
*/
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
key_for_each_safe(ep_key, tmp, keys) {
list_del_init(&ep_key->key_list);
- sctp_auth_shkey_free(ep_key);
+ sctp_auth_shkey_release(ep_key);
}
}
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
sctp_auth_key_put(asoc->asoc_shared_key);
asoc->asoc_shared_key = secret;
+ asoc->shkey = ep_key;
/* Update send queue in case any chunk already in there now
* needs authenticating
*/
list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
- if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+ if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
chunk->auth = 1;
+ if (!chunk->shkey) {
+ chunk->shkey = asoc->shkey;
+ sctp_auth_shkey_hold(chunk->shkey);
+ }
+ }
}
return 0;
@@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
/* First search associations set of endpoint pair shared keys */
key_for_each(key, &asoc->endpoint_shared_keys) {
- if (key->key_id == key_id)
- return key;
+ if (key->key_id == key_id) {
+ if (!key->deactivated)
+ return key;
+ break;
+ }
}
return NULL;
@@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
* after the AUTH chunk in the SCTP packet.
*/
void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
- struct sk_buff *skb,
- struct sctp_auth_chunk *auth,
- gfp_t gfp)
+ struct sk_buff *skb, struct sctp_auth_chunk *auth,
+ struct sctp_shared_key *ep_key, gfp_t gfp)
{
- struct crypto_shash *tfm;
struct sctp_auth_bytes *asoc_key;
+ struct crypto_shash *tfm;
__u16 key_id, hmac_id;
- __u8 *digest;
unsigned char *end;
int free_key = 0;
+ __u8 *digest;
/* Extract the info we need:
* - hmac id
@@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
if (key_id == asoc->active_key_id)
asoc_key = asoc->asoc_shared_key;
else {
- struct sctp_shared_key *ep_key;
-
- ep_key = sctp_auth_get_shkey(asoc, key_id);
- if (!ep_key)
- return;
-
+ /* ep_key can't be NULL here */
asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
if (!asoc_key)
return;
@@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
struct sctp_association *asoc,
struct sctp_authkey *auth_key)
{
- struct sctp_shared_key *cur_key = NULL;
+ struct sctp_shared_key *cur_key, *shkey;
struct sctp_auth_bytes *key;
struct list_head *sh_keys;
int replace = 0;
@@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
else
sh_keys = &ep->endpoint_shared_keys;
- key_for_each(cur_key, sh_keys) {
- if (cur_key->key_id == auth_key->sca_keynumber) {
+ key_for_each(shkey, sh_keys) {
+ if (shkey->key_id == auth_key->sca_keynumber) {
replace = 1;
break;
}
}
- /* If we are not replacing a key id, we need to allocate
- * a shared key.
- */
- if (!replace) {
- cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
- GFP_KERNEL);
- if (!cur_key)
- return -ENOMEM;
- }
+ cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+ if (!cur_key)
+ return -ENOMEM;
/* Create a new key data based on the info passed in */
key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
- if (!key)
- goto nomem;
+ if (!key) {
+ kfree(cur_key);
+ return -ENOMEM;
+ }
memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
+ cur_key->key = key;
- /* If we are replacing, remove the old keys data from the
- * key id. If we are adding new key id, add it to the
- * list.
- */
- if (replace)
- sctp_auth_key_put(cur_key->key);
- else
- list_add(&cur_key->key_list, sh_keys);
+ if (replace) {
+ list_del_init(&shkey->key_list);
+ sctp_auth_shkey_release(shkey);
+ }
+ list_add(&cur_key->key_list, sh_keys);
- cur_key->key = key;
return 0;
-nomem:
- if (!replace)
- sctp_auth_shkey_free(cur_key);
-
- return -ENOMEM;
}
int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
}
}
- if (!found)
+ if (!found || key->deactivated)
return -EINVAL;
if (asoc) {
@@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
/* Delete the shared key */
list_del_init(&key->key_list);
- sctp_auth_shkey_free(key);
+ sctp_auth_shkey_release(key);
+
+ return 0;
+}
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+ struct sctp_association *asoc, __u16 key_id)
+{
+ struct sctp_shared_key *key;
+ struct list_head *sh_keys;
+ int found = 0;
+
+ /* The key identifier MUST NOT be the current active key
+ * The key identifier MUST correst to an existing key
+ */
+ if (asoc) {
+ if (asoc->active_key_id == key_id)
+ return -EINVAL;
+
+ sh_keys = &asoc->endpoint_shared_keys;
+ } else {
+ if (ep->active_key_id == key_id)
+ return -EINVAL;
+
+ sh_keys = &ep->endpoint_shared_keys;
+ }
+
+ key_for_each(key, sh_keys) {
+ if (key->key_id == key_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return -EINVAL;
+
+ /* refcnt == 1 and !list_empty mean it's not being used anywhere
+ * and deactivated will be set, so it's time to notify userland
+ * that this shkey can be freed.
+ */
+ if (asoc && !list_empty(&key->key_list) &&
+ refcount_read(&key->refcnt) == 1) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+ SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+
+ key->deactivated = 1;
return 0;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 991a530c6b31..be296d633e95 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -168,9 +168,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
{
size_t len, first_len, max_data, remaining;
size_t msg_len = iov_iter_count(from);
+ struct sctp_shared_key *shkey = NULL;
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_datamsg *msg;
+ struct sctp_sock *sp;
+ struct sctp_af *af;
int err;
msg = sctp_datamsg_new(GFP_KERNEL);
@@ -189,9 +192,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- max_data = asoc->pathmtu -
- sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream);
+ sp = sctp_sk(asoc->base.sk);
+ af = sp->pf->af;
+ max_data = asoc->pathmtu - af->net_header_len -
+ sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream) -
+ af->ip_options_len(asoc->base.sk);
max_data = SCTP_TRUNC4(max_data);
/* If the the peer requested that we authenticate DATA chunks
@@ -204,6 +209,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (hmac_desc)
max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
hmac_desc->hmac_len);
+
+ if (sinfo->sinfo_tsn &&
+ sinfo->sinfo_ssn != asoc->active_key_id) {
+ shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+ if (!shkey) {
+ err = -EINVAL;
+ goto errout;
+ }
+ } else {
+ shkey = asoc->shkey;
+ }
}
/* Check what's our max considering the above */
@@ -275,6 +291,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (err < 0)
goto errout_chunk_free;
+ chunk->shkey = shkey;
+
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
chunk->skb->data);
diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c
index a72a7d925d46..078f01a8d582 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/diag.c
@@ -1,3 +1,34 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
#include <linux/module.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8b3146816519..e2f5a3ee41a7 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -349,8 +349,8 @@ out:
/* Look for any peeled off association from the endpoint that matches the
* given peer address.
*/
-int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
- const union sctp_addr *paddr)
+bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+ const union sctp_addr *paddr)
{
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
@@ -362,10 +362,10 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
*/
list_for_each_entry(addr, &bp->address_list, list) {
if (sctp_has_association(net, &addr->a, paddr))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/* Do delayed input processing. This is scheduled by sctp_rcv().
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0247cc432e02..ba8a6e6c36fa 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb)
int family;
struct sctp_af *af;
struct net *net = dev_net(skb->dev);
+ bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb);
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb)
* it's better to just linearize it otherwise crc computing
* takes longer.
*/
- if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
- skb_linearize(skb)) ||
+ if ((!is_gso && skb_linearize(skb)) ||
!pskb_may_pull(skb, sizeof(struct sctphdr)))
goto discard_it;
@@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable &&
- !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+ !is_gso &&
sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
@@ -1010,19 +1010,18 @@ struct sctp_association *sctp_lookup_association(struct net *net,
}
/* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(struct net *net,
- const union sctp_addr *laddr,
- const union sctp_addr *paddr)
+bool sctp_has_association(struct net *net,
+ const union sctp_addr *laddr,
+ const union sctp_addr *paddr)
{
- struct sctp_association *asoc;
struct sctp_transport *transport;
- if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
+ if (sctp_lookup_association(net, laddr, paddr, &transport)) {
sctp_transport_put(transport);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
@@ -1218,7 +1217,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
* issue as packets hitting this are mostly INIT or INIT-ACK and
* those cannot be on GSO-style anyway.
*/
- if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+ if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
return NULL;
ch = (struct sctp_chunkhdr *)skb->data;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 48392552ee7c..23ebc5318edc 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -170,7 +170,7 @@ next_chunk:
chunk = list_entry(entry, struct sctp_chunk, list);
- if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+ if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) {
/* GSO-marked skbs but without frags, handle
* them normally
*/
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e35d4f73d2df..31083b5035ec 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -427,6 +427,41 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
rcu_read_unlock();
}
+/* Copy over any ip options */
+static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+ struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ pr_err("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v6_ip_options_len(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ int len = 0;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ len = opt->opt_flen + opt->opt_nflen;
+
+ rcu_read_unlock();
+ return len;
+}
+
/* Initialize a sockaddr_storage from in incoming skb. */
static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
@@ -666,7 +701,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
- struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
if (!newsk)
@@ -689,12 +723,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
- rcu_read_lock();
- opt = rcu_dereference(np->opt);
- if (opt)
- opt = ipv6_dup_options(newsk, opt);
- RCU_INIT_POINTER(newnp->opt, opt);
- rcu_read_unlock();
+ sctp_v6_copy_ip_options(sk, newsk);
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
@@ -728,8 +757,10 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
sctp_v6_map_v4(addr);
}
- if (addr->sa.sa_family == AF_INET)
+ if (addr->sa.sa_family == AF_INET) {
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
return sizeof(struct sockaddr_in);
+ }
return sizeof(struct sockaddr_in6);
}
@@ -952,16 +983,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
int rc;
- rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+ rc = inet6_getname(sock, uaddr, peer);
- if (rc != 0)
+ if (rc < 0)
return rc;
- *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+ rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
(union sctp_addr *)uaddr);
return rc;
@@ -1041,6 +1072,7 @@ static struct sctp_af sctp_af_inet6 = {
.ecn_capable = sctp_v6_ecn_capable,
.net_header_len = sizeof(struct ipv6hdr),
.sockaddr_len = sizeof(struct sockaddr_in6),
+ .ip_options_len = sctp_v6_ip_options_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
@@ -1059,6 +1091,7 @@ static struct sctp_pf sctp_pf_inet6 = {
.addr_to_user = sctp_v6_addr_to_user,
.to_sk_saddr = sctp_v6_to_sk_saddr,
.to_sk_daddr = sctp_v6_to_sk_daddr,
+ .copy_ip_options = sctp_v6_copy_ip_options,
.af = &sctp_af_inet6,
};
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index aeea6da81441..fd2684ad94c8 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
if (!ent)
pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
}
-
-/* Cleanup the objcount entry in the proc filesystem. */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
- remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 35bc7106d182..123e9f2dc226 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ if (!skb_is_gso_sctp(skb))
goto out;
sh = sctp_hdr(skb);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01a26ee051e3..690d8557bb7b 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -69,7 +69,11 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
static void sctp_packet_reset(struct sctp_packet *packet)
{
+ /* sctp_packet_transmit() relies on this to reset size to the
+ * current overhead after sending packets.
+ */
packet->size = packet->overhead;
+
packet->has_cookie_echo = 0;
packet->has_sack = 0;
packet->has_data = 0;
@@ -87,6 +91,7 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sock *sk;
+ size_t overhead = sizeof(struct ipv6hdr) + sizeof(struct sctphdr);
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
@@ -95,10 +100,22 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
if (!sctp_packet_empty(packet))
return;
- /* set packet max_size with pathmtu */
+ /* set packet max_size with pathmtu, then calculate overhead */
packet->max_size = tp->pathmtu;
- if (!asoc)
+ if (asoc) {
+ struct sctp_sock *sp = sctp_sk(asoc->base.sk);
+ struct sctp_af *af = sp->pf->af;
+
+ overhead = af->net_header_len +
+ af->ip_options_len(asoc->base.sk);
+ overhead += sizeof(struct sctphdr);
+ packet->overhead = overhead;
+ packet->size = overhead;
+ } else {
+ packet->overhead = overhead;
+ packet->size = overhead;
return;
+ }
/* update dst or transport pathmtu if in need */
sk = asoc->base.sk;
@@ -140,23 +157,14 @@ void sctp_packet_init(struct sctp_packet *packet,
struct sctp_transport *transport,
__u16 sport, __u16 dport)
{
- struct sctp_association *asoc = transport->asoc;
- size_t overhead;
-
pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport);
packet->transport = transport;
packet->source_port = sport;
packet->destination_port = dport;
INIT_LIST_HEAD(&packet->chunk_list);
- if (asoc) {
- struct sctp_sock *sp = sctp_sk(asoc->base.sk);
- overhead = sp->pf->af->net_header_len;
- } else {
- overhead = sizeof(struct ipv6hdr);
- }
- overhead += sizeof(struct sctphdr);
- packet->overhead = overhead;
+ /* The overhead will be calculated by sctp_packet_config() */
+ packet->overhead = 0;
sctp_packet_reset(packet);
packet->vtag = 0;
}
@@ -241,10 +249,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
if (!chunk->auth)
return retval;
- auth = sctp_make_auth(asoc);
+ auth = sctp_make_auth(asoc, chunk->shkey->key_id);
if (!auth)
return retval;
+ auth->shkey = chunk->shkey;
+ sctp_auth_shkey_hold(auth->shkey);
+
retval = __sctp_packet_append_chunk(pkt, auth);
if (retval != SCTP_XMIT_OK)
@@ -490,7 +501,8 @@ merge:
}
if (auth) {
- sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+ sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+ packet->auth->shkey, gfp);
/* free auth if no more chunks, or add it back */
if (list_empty(&packet->chunk_list))
sctp_chunk_free(packet->auth);
@@ -770,6 +782,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
enum sctp_xmit retval = SCTP_XMIT_OK;
size_t psize, pmtu, maxsize;
+ /* Don't bundle in this packet if this chunk's auth key doesn't
+ * match other chunks already enqueued on this packet. Also,
+ * don't bundle the chunk with auth key if other chunks in this
+ * packet don't have auth key.
+ */
+ if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+ (!packet->auth && chunk->shkey &&
+ chunk->chunk_hdr->type != SCTP_CID_AUTH))
+ return SCTP_XMIT_PMTU_FULL;
+
psize = packet->size;
if (packet->transport->asoc)
pmtu = packet->transport->asoc->pathmtu;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 537545ebcb0e..1d9ccc6dab2b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
.release = single_release_net,
};
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_snmp_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
- remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
/* Dump local addresses of an association/endpoint. */
static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
{
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
.release = seq_release_net,
};
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_eps_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
- remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
struct sctp_ht_iter {
struct seq_net_private p;
struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
.release = seq_release_net,
};
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_assocs_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
- remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
.show = sctp_remaddr_seq_show,
};
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
- remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.release = seq_release_net,
};
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
{
- struct proc_dir_entry *p;
-
- p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_remaddr_seq_fops);
- if (!p)
+ net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+ if (!net->sctp.proc_net_sctp)
return -ENOMEM;
+ if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp,
+ &sctp_snmp_seq_fops))
+ goto cleanup;
+ if (!proc_create("eps", 0444, net->sctp.proc_net_sctp,
+ &sctp_eps_seq_fops))
+ goto cleanup;
+ if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp,
+ &sctp_assocs_seq_fops))
+ goto cleanup;
+ if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp,
+ &sctp_remaddr_seq_fops))
+ goto cleanup;
return 0;
+
+cleanup:
+ remove_proc_subtree("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+ return -ENOMEM;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91813e686c67..d685f8456762 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
- if (!net->sctp.proc_net_sctp)
- goto out_proc_net_sctp;
- if (sctp_snmp_proc_init(net))
- goto out_snmp_proc_init;
- if (sctp_eps_proc_init(net))
- goto out_eps_proc_init;
- if (sctp_assocs_proc_init(net))
- goto out_assocs_proc_init;
- if (sctp_remaddr_proc_init(net))
- goto out_remaddr_proc_init;
-
- return 0;
-
-out_remaddr_proc_init:
- sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
- sctp_eps_proc_exit(net);
-out_eps_proc_init:
- sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
- remove_proc_entry("sctp", net->proc_net);
- net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
- return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
- return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- sctp_snmp_proc_exit(net);
- sctp_eps_proc_exit(net);
- sctp_assocs_proc_exit(net);
- sctp_remaddr_proc_exit(net);
-
- remove_proc_entry("sctp", net->proc_net);
- net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
/* Private helper to extract ipv4 address and stash them in
* the protocol structure.
*/
@@ -237,6 +187,45 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
return error;
}
+/* Copy over any ip options */
+static void sctp_v4_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+ struct inet_sock *newinet, *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ pr_err("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v4_ip_options_len(struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt;
+ int len = 0;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt)
+ len = inet_opt->opt.optlen;
+
+ rcu_read_unlock();
+ return len;
+}
+
/* Initialize a sctp_addr from in incoming skb. */
static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
@@ -588,6 +577,8 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
sctp_copy_sock(newsk, sk, asoc);
sock_reset_flag(newsk, SOCK_ZAPPED);
+ sctp_v4_copy_ip_options(sk, newsk);
+
newinet = inet_sk(newsk);
newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
@@ -1006,6 +997,7 @@ static struct sctp_pf sctp_pf_inet = {
.addr_to_user = sctp_v4_addr_to_user,
.to_sk_saddr = sctp_v4_to_sk_saddr,
.to_sk_daddr = sctp_v4_to_sk_daddr,
+ .copy_ip_options = sctp_v4_copy_ip_options,
.af = &sctp_af_inet
};
@@ -1090,6 +1082,7 @@ static struct sctp_af sctp_af_inet = {
.ecn_capable = sctp_v4_ecn_capable,
.net_header_len = sizeof(struct iphdr),
.sockaddr_len = sizeof(struct sockaddr_in),
+ .ip_options_len = sctp_v4_ip_options_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
@@ -1285,10 +1278,12 @@ static int __net_init sctp_defaults_init(struct net *net)
if (status)
goto err_init_mibs;
+#ifdef CONFIG_PROC_FS
/* Initialize proc fs directory. */
status = sctp_proc_init(net);
if (status)
goto err_init_proc;
+#endif
sctp_dbg_objcnt_init(net);
@@ -1306,8 +1301,10 @@ static int __net_init sctp_defaults_init(struct net *net)
return 0;
+#ifdef CONFIG_PROC_FS
err_init_proc:
cleanup_sctp_mibs(net);
+#endif
err_init_mibs:
sctp_sysctl_net_unregister(net);
err_sysctl_register:
@@ -1320,9 +1317,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
sctp_free_addr_wq(net);
sctp_free_local_addr_list(net);
- sctp_dbg_objcnt_exit(net);
-
- sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+ remove_proc_subtree("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+#endif
cleanup_sctp_mibs(net);
sctp_sysctl_net_unregister(net);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d01475f5f710..5a4fb1dc8400 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -87,7 +87,28 @@ static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
/* Control chunk destructor */
static void sctp_control_release_owner(struct sk_buff *skb)
{
- /*TODO: do memory release */
+ struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+ if (chunk->shkey) {
+ struct sctp_shared_key *shkey = chunk->shkey;
+ struct sctp_association *asoc = chunk->asoc;
+
+ /* refcnt == 2 and !list_empty mean after this release, it's
+ * not being used anywhere, and it's time to notify userland
+ * that this shkey can be freed if it's been deactivated.
+ */
+ if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+ refcount_read(&shkey->refcnt) == 2) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+ SCTP_AUTH_FREE_KEY,
+ GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+ sctp_auth_shkey_release(chunk->shkey);
+ }
}
static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
*
* For now don't do anything for now.
*/
+ if (chunk->auth) {
+ chunk->shkey = asoc->shkey;
+ sctp_auth_shkey_hold(chunk->shkey);
+ }
skb->sk = asoc ? asoc->base.sk : NULL;
+ skb_shinfo(skb)->destructor_arg = chunk;
skb->destructor = sctp_control_release_owner;
}
@@ -1271,7 +1297,8 @@ nodata:
return retval;
}
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+ __u16 key_id)
{
struct sctp_authhdr auth_hdr;
struct sctp_hmac *hmac_desc;
@@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
return NULL;
auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
- auth_hdr.shkey_id = htons(asoc->active_key_id);
+ auth_hdr.shkey_id = htons(key_id);
retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
&auth_hdr);
@@ -3071,6 +3098,12 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (af->is_any(&addr))
memcpy(&addr, &asconf->source, sizeof(addr));
+ if (security_sctp_bind_connect(asoc->ep->base.sk,
+ SCTP_PARAM_ADD_IP,
+ (struct sockaddr *)&addr,
+ af->sockaddr_len))
+ return SCTP_ERROR_REQ_REFUSED;
+
/* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
* request and does not have the local resources to add this
* new address to the association, it MUST return an Error
@@ -3137,6 +3170,12 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (af->is_any(&addr))
memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+ if (security_sctp_bind_connect(asoc->ep->base.sk,
+ SCTP_PARAM_SET_PRIMARY,
+ (struct sockaddr *)&addr,
+ af->sockaddr_len))
+ return SCTP_ERROR_REQ_REFUSED;
+
peer = sctp_assoc_lookup_paddr(asoc, &addr);
if (!peer)
return SCTP_ERROR_DNS_FAILED;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b71e7fb0a20a..298112ca8c06 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+ struct sctp_association *asoc)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
/* Helper function to generate an adaptation indication event */
static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
case SCTP_CMD_ADAPTATION_IND:
sctp_cmd_adaptation_ind(commands, asoc);
break;
+ case SCTP_CMD_PEER_NO_AUTH:
+ sctp_cmd_peer_no_auth(commands, asoc);
+ break;
case SCTP_CMD_ASSOC_SHKEY:
error = sctp_auth_asoc_init_active_key(asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index eb7905ffe5f2..dd0594a10961 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -321,6 +321,11 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
struct sctp_packet *packet;
int len;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -659,7 +664,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
void *arg,
struct sctp_cmd_seq *commands)
{
- struct sctp_ulpevent *ev, *ai_ev = NULL;
+ struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
struct sctp_association *new_asoc;
struct sctp_init_chunk *peer_init;
struct sctp_chunk *chunk = arg;
@@ -820,6 +825,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
goto nomem_aiev;
}
+ if (!new_asoc->peer.auth_capable) {
+ auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+ SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!auth_ev)
+ goto nomem_authev;
+ }
+
/* Add all the state machine commands now since we've created
* everything. This way we don't introduce memory corruptions
* during side-effect processing and correclty count established
@@ -847,8 +860,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ai_ev));
+ if (auth_ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(auth_ev));
+
return SCTP_DISPOSITION_CONSUME;
+nomem_authev:
+ sctp_ulpevent_free(ai_ev);
nomem_aiev:
sctp_ulpevent_free(ev);
nomem_ev:
@@ -908,6 +927,9 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
*/
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
+ /* Set peer label for connection. */
+ security_inet_conn_established(ep->base.sk, chunk->skb);
+
/* RFC 2960 5.1 Normal Establishment of an Association
*
* E) Upon reception of the COOKIE ACK, endpoint "A" will move
@@ -953,6 +975,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
SCTP_ULPEVENT(ev));
}
+ if (!asoc->peer.auth_capable) {
+ ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!ev)
+ goto nomem;
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ev));
+ }
+
return SCTP_DISPOSITION_CONSUME;
nomem:
return SCTP_DISPOSITION_NOMEM;
@@ -1436,6 +1467,11 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
struct sctp_packet *packet;
int len;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -1908,6 +1944,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
if (asoc->peer.adaptation_ind)
sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
+ if (!asoc->peer.auth_capable)
+ sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
return SCTP_DISPOSITION_CONSUME;
nomem:
@@ -1954,7 +1993,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
- struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+ struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
struct sctp_chunk *repl;
/* Clarification from Implementor's Guide:
@@ -2001,6 +2040,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
goto nomem;
}
+
+ if (!asoc->peer.auth_capable) {
+ auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+ SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!auth_ev)
+ goto nomem;
+ }
}
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2062,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
if (ai_ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ai_ev));
+ if (auth_ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(auth_ev));
return SCTP_DISPOSITION_CONSUME;
nomem:
+ if (auth_ev)
+ sctp_ulpevent_free(auth_ev);
if (ai_ev)
sctp_ulpevent_free(ai_ev);
if (ev)
@@ -2106,6 +2158,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
}
}
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Set temp so that it won't be added into hashtable */
new_asoc->temp = 1;
@@ -4114,6 +4171,7 @@ static enum sctp_ierror sctp_sf_authenticate(
const union sctp_subtype type,
struct sctp_chunk *chunk)
{
+ struct sctp_shared_key *sh_key = NULL;
struct sctp_authhdr *auth_hdr;
__u8 *save_digest, *digest;
struct sctp_hmac *hmac;
@@ -4135,9 +4193,11 @@ static enum sctp_ierror sctp_sf_authenticate(
* configured
*/
key_id = ntohs(auth_hdr->shkey_id);
- if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
- return SCTP_IERROR_AUTH_BAD_KEYID;
-
+ if (key_id != asoc->active_key_id) {
+ sh_key = sctp_auth_get_shkey(asoc, key_id);
+ if (!sh_key)
+ return SCTP_IERROR_AUTH_BAD_KEYID;
+ }
/* Make sure that the length of the signature matches what
* we expect.
@@ -4166,7 +4226,7 @@ static enum sctp_ierror sctp_sf_authenticate(
sctp_auth_calculate_hmac(asoc, chunk->skb,
(struct sctp_auth_chunk *)chunk->chunk_hdr,
- GFP_ATOMIC);
+ sh_key, GFP_ATOMIC);
/* Discard the packet if the digests do not match */
if (memcmp(save_digest, digest, sig_len)) {
@@ -4243,7 +4303,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
struct sctp_ulpevent *ev;
ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
- SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+ SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
if (!ev)
return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf271f8c2dc9..80835ac26d2c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
/* The sndbuf space is tracked per association. */
sctp_association_hold(asoc);
+ if (chunk->shkey)
+ sctp_auth_shkey_hold(chunk->shkey);
+
skb_set_owner_w(chunk->skb, sk);
chunk->skb->destructor = sctp_wfree;
@@ -354,11 +357,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
if (!opt->pf->af_supported(addr->sa.sa_family, opt))
return NULL;
- /* V4 mapped address are really of AF_INET family */
- if (addr->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
- !opt->pf->af_supported(AF_INET, opt))
- return NULL;
+ if (addr->sa.sa_family == AF_INET6) {
+ if (len < SIN6_LEN_RFC2133)
+ return NULL;
+ /* V4 mapped address are really of AF_INET family */
+ if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+ !opt->pf->af_supported(AF_INET, opt))
+ return NULL;
+ }
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
@@ -1043,6 +1049,12 @@ static int sctp_setsockopt_bindx(struct sock *sk,
/* Do the work. */
switch (op) {
case SCTP_BINDX_ADD_ADDR:
+ /* Allow security module to validate bindx addresses. */
+ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD,
+ (struct sockaddr *)kaddrs,
+ addrs_size);
+ if (err)
+ goto out;
err = sctp_bindx_add(sk, kaddrs, addrcnt);
if (err)
goto out;
@@ -1252,6 +1264,7 @@ static int __sctp_connect(struct sock *sk,
if (assoc_id)
*assoc_id = asoc->assoc_id;
+
err = sctp_wait_for_connect(asoc, &timeo);
/* Note: the asoc may be freed after the return of
* sctp_wait_for_connect.
@@ -1347,7 +1360,16 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
+ /* Allow security module to validate connectx addresses. */
+ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_CONNECTX,
+ (struct sockaddr *)kaddrs,
+ addrs_size);
+ if (err)
+ goto out_free;
+
err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+
+out_free:
kvfree(kaddrs);
return err;
@@ -1606,396 +1628,319 @@ static int sctp_error(struct sock *sk, int flags, int err)
static int sctp_msghdr_parse(const struct msghdr *msg,
struct sctp_cmsgs *cmsgs);
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+ struct sctp_sndrcvinfo *srinfo,
+ const struct msghdr *msg, size_t msg_len)
{
- struct net *net = sock_net(sk);
- struct sctp_sock *sp;
- struct sctp_endpoint *ep;
- struct sctp_association *new_asoc = NULL, *asoc = NULL;
- struct sctp_transport *transport, *chunk_tp;
- struct sctp_chunk *chunk;
- union sctp_addr to;
- struct sockaddr *msg_name = NULL;
- struct sctp_sndrcvinfo default_sinfo;
- struct sctp_sndrcvinfo *sinfo;
- struct sctp_initmsg *sinit;
- sctp_assoc_t associd = 0;
- struct sctp_cmsgs cmsgs = { NULL };
- enum sctp_scope scope;
- bool fill_sinfo_ttl = false, wait_connect = false;
- struct sctp_datamsg *datamsg;
- int msg_flags = msg->msg_flags;
- __u16 sinfo_flags = 0;
- long timeo;
+ __u16 sflags;
int err;
- err = 0;
- sp = sctp_sk(sk);
- ep = sp->ep;
-
- pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
- msg, msg_len, ep);
+ if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+ return -EPIPE;
- /* We cannot send a message over a TCP-style listening socket. */
- if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
- err = -EPIPE;
- goto out_nounlock;
- }
+ if (msg_len > sk->sk_sndbuf)
+ return -EMSGSIZE;
- /* Parse out the SCTP CMSGs. */
- err = sctp_msghdr_parse(msg, &cmsgs);
+ memset(cmsgs, 0, sizeof(*cmsgs));
+ err = sctp_msghdr_parse(msg, cmsgs);
if (err) {
pr_debug("%s: msghdr parse err:%x\n", __func__, err);
- goto out_nounlock;
+ return err;
}
- /* Fetch the destination address for this packet. This
- * address only selects the association--it is not necessarily
- * the address we will send to.
- * For a peeled-off socket, msg_name is ignored.
- */
- if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
- int msg_namelen = msg->msg_namelen;
+ memset(srinfo, 0, sizeof(*srinfo));
+ if (cmsgs->srinfo) {
+ srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+ srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+ srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+ srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+ srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+ srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+ }
- err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
- msg_namelen);
- if (err)
- return err;
+ if (cmsgs->sinfo) {
+ srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+ srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+ srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+ srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+ srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
+ }
- if (msg_namelen > sizeof(to))
- msg_namelen = sizeof(to);
- memcpy(&to, msg->msg_name, msg_namelen);
- msg_name = msg->msg_name;
+ if (cmsgs->prinfo) {
+ srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value;
+ SCTP_PR_SET_POLICY(srinfo->sinfo_flags,
+ cmsgs->prinfo->pr_policy);
}
- sinit = cmsgs.init;
- if (cmsgs.sinfo != NULL) {
- memset(&default_sinfo, 0, sizeof(default_sinfo));
- default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
- default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
- default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
- default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
- default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+ sflags = srinfo->sinfo_flags;
+ if (!sflags && msg_len)
+ return 0;
- sinfo = &default_sinfo;
- fill_sinfo_ttl = true;
- } else {
- sinfo = cmsgs.srinfo;
- }
- /* Did the user specify SNDINFO/SNDRCVINFO? */
- if (sinfo) {
- sinfo_flags = sinfo->sinfo_flags;
- associd = sinfo->sinfo_assoc_id;
- }
+ if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+ return -EINVAL;
- pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
- msg_len, sinfo_flags);
+ if (((sflags & SCTP_EOF) && msg_len > 0) ||
+ (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+ return -EINVAL;
- /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
- if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
- err = -EINVAL;
- goto out_nounlock;
- }
+ if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+ return -EINVAL;
- /* If SCTP_EOF is set, no data can be sent. Disallow sending zero
- * length messages when SCTP_EOF|SCTP_ABORT is not set.
- * If SCTP_ABORT is set, the message length could be non zero with
- * the msg_iov set to the user abort reason.
- */
- if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
- (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
- err = -EINVAL;
- goto out_nounlock;
- }
+ return 0;
+}
- /* If SCTP_ADDR_OVER is set, there must be an address
- * specified in msg_name.
- */
- if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
- err = -EINVAL;
- goto out_nounlock;
- }
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+ struct sctp_cmsgs *cmsgs,
+ union sctp_addr *daddr,
+ struct sctp_transport **tp)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct net *net = sock_net(sk);
+ struct sctp_association *asoc;
+ enum sctp_scope scope;
+ struct cmsghdr *cmsg;
+ struct sctp_af *af;
+ int err;
- transport = NULL;
+ *tp = NULL;
- pr_debug("%s: about to look up association\n", __func__);
+ if (sflags & (SCTP_EOF | SCTP_ABORT))
+ return -EINVAL;
- lock_sock(sk);
+ if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+ sctp_sstate(sk, CLOSING)))
+ return -EADDRNOTAVAIL;
- /* If a msg_name has been specified, assume this is to be used. */
- if (msg_name) {
- /* Look for a matching association on the endpoint. */
- asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+ if (sctp_endpoint_is_peeled_off(ep, daddr))
+ return -EADDRNOTAVAIL;
- /* If we could not find a matching association on the
- * endpoint, make sure that it is not a TCP-style
- * socket that already has an association or there is
- * no peeled-off association on another socket.
- */
- if (!asoc &&
- ((sctp_style(sk, TCP) &&
- (sctp_sstate(sk, ESTABLISHED) ||
- sctp_sstate(sk, CLOSING))) ||
- sctp_endpoint_is_peeled_off(ep, &to))) {
- err = -EADDRNOTAVAIL;
- goto out_unlock;
- }
+ if (!ep->base.bind_addr.port) {
+ if (sctp_autobind(sk))
+ return -EAGAIN;
} else {
- asoc = sctp_id2assoc(sk, associd);
- if (!asoc) {
- err = -EPIPE;
- goto out_unlock;
- }
+ if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+ return -EACCES;
}
- if (asoc) {
- pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+ scope = sctp_scope(daddr);
- /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
- * socket that has an association in CLOSED state. This can
- * happen when an accepted socket has an association that is
- * already CLOSED.
- */
- if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
- err = -EPIPE;
- goto out_unlock;
- }
+ /* Label connection socket for first association 1-to-many
+ * style for client sequence socket()->sendmsg(). This
+ * needs to be done before sctp_assoc_add_peer() as that will
+ * set up the initial packet that needs to account for any
+ * security ip options (CIPSO/CALIPSO) added to the packet.
+ */
+ af = sctp_get_af_specific(daddr->sa.sa_family);
+ if (!af)
+ return -EINVAL;
+ err = security_sctp_bind_connect(sk, SCTP_SENDMSG_CONNECT,
+ (struct sockaddr *)daddr,
+ af->sockaddr_len);
+ if (err < 0)
+ return err;
- if (sinfo_flags & SCTP_EOF) {
- pr_debug("%s: shutting down association:%p\n",
- __func__, asoc);
+ asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+ if (!asoc)
+ return -ENOMEM;
- sctp_primitive_SHUTDOWN(net, asoc, NULL);
- err = 0;
- goto out_unlock;
- }
- if (sinfo_flags & SCTP_ABORT) {
+ if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+ err = -ENOMEM;
+ goto free;
+ }
- chunk = sctp_make_abort_user(asoc, msg, msg_len);
- if (!chunk) {
- err = -ENOMEM;
- goto out_unlock;
- }
+ if (cmsgs->init) {
+ struct sctp_initmsg *init = cmsgs->init;
- pr_debug("%s: aborting association:%p\n",
- __func__, asoc);
+ if (init->sinit_num_ostreams) {
+ __u16 outcnt = init->sinit_num_ostreams;
- sctp_primitive_ABORT(net, asoc, chunk);
- err = 0;
- goto out_unlock;
+ asoc->c.sinit_num_ostreams = outcnt;
+ /* outcnt has been changed, need to re-init stream */
+ err = sctp_stream_init(&asoc->stream, outcnt, 0,
+ GFP_KERNEL);
+ if (err)
+ goto free;
}
+
+ if (init->sinit_max_instreams)
+ asoc->c.sinit_max_instreams = init->sinit_max_instreams;
+
+ if (init->sinit_max_attempts)
+ asoc->max_init_attempts = init->sinit_max_attempts;
+
+ if (init->sinit_max_init_timeo)
+ asoc->max_init_timeo =
+ msecs_to_jiffies(init->sinit_max_init_timeo);
}
- /* Do we need to create the association? */
- if (!asoc) {
- pr_debug("%s: there is no association yet\n", __func__);
+ *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+ if (!*tp) {
+ err = -ENOMEM;
+ goto free;
+ }
- if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
- err = -EINVAL;
- goto out_unlock;
- }
+ if (!cmsgs->addrs_msg)
+ return 0;
- /* Check for invalid stream against the stream counts,
- * either the default or the user specified stream counts.
- */
- if (sinfo) {
- if (!sinit || !sinit->sinit_num_ostreams) {
- /* Check against the defaults. */
- if (sinfo->sinfo_stream >=
- sp->initmsg.sinit_num_ostreams) {
- err = -EINVAL;
- goto out_unlock;
- }
- } else {
- /* Check against the requested. */
- if (sinfo->sinfo_stream >=
- sinit->sinit_num_ostreams) {
- err = -EINVAL;
- goto out_unlock;
- }
- }
- }
+ /* sendv addr list parse */
+ for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
+ struct sctp_transport *transport;
+ struct sctp_association *old;
+ union sctp_addr _daddr;
+ int dlen;
- /*
- * API 3.1.2 bind() - UDP Style Syntax
- * If a bind() or sctp_bindx() is not called prior to a
- * sendmsg() call that initiates a new association, the
- * system picks an ephemeral port and will choose an address
- * set equivalent to binding with a wildcard address.
- */
- if (!ep->base.bind_addr.port) {
- if (sctp_autobind(sk)) {
- err = -EAGAIN;
- goto out_unlock;
+ if (cmsg->cmsg_level != IPPROTO_SCTP ||
+ (cmsg->cmsg_type != SCTP_DSTADDRV4 &&
+ cmsg->cmsg_type != SCTP_DSTADDRV6))
+ continue;
+
+ daddr = &_daddr;
+ memset(daddr, 0, sizeof(*daddr));
+ dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
+ if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
+ if (dlen < sizeof(struct in_addr)) {
+ err = -EINVAL;
+ goto free;
}
+
+ dlen = sizeof(struct in_addr);
+ daddr->v4.sin_family = AF_INET;
+ daddr->v4.sin_port = htons(asoc->peer.port);
+ memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
} else {
- /*
- * If an unprivileged user inherits a one-to-many
- * style socket with open associations on a privileged
- * port, it MAY be permitted to accept new associations,
- * but it SHOULD NOT be permitted to open new
- * associations.
- */
- if (ep->base.bind_addr.port < inet_prot_sock(net) &&
- !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
- err = -EACCES;
- goto out_unlock;
+ if (dlen < sizeof(struct in6_addr)) {
+ err = -EINVAL;
+ goto free;
}
- }
- scope = sctp_scope(&to);
- new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
- if (!new_asoc) {
- err = -ENOMEM;
- goto out_unlock;
+ dlen = sizeof(struct in6_addr);
+ daddr->v6.sin6_family = AF_INET6;
+ daddr->v6.sin6_port = htons(asoc->peer.port);
+ memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
}
- asoc = new_asoc;
- err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
- if (err < 0) {
- err = -ENOMEM;
- goto out_free;
+ err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+ if (err)
+ goto free;
+
+ old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+ if (old && old != asoc) {
+ if (old->state >= SCTP_STATE_ESTABLISHED)
+ err = -EISCONN;
+ else
+ err = -EALREADY;
+ goto free;
}
- /* If the SCTP_INIT ancillary data is specified, set all
- * the association init values accordingly.
- */
- if (sinit) {
- if (sinit->sinit_num_ostreams) {
- __u16 outcnt = sinit->sinit_num_ostreams;
-
- asoc->c.sinit_num_ostreams = outcnt;
- /* outcnt has been changed, so re-init stream */
- err = sctp_stream_init(&asoc->stream, outcnt, 0,
- GFP_KERNEL);
- if (err)
- goto out_free;
- }
- if (sinit->sinit_max_instreams) {
- asoc->c.sinit_max_instreams =
- sinit->sinit_max_instreams;
- }
- if (sinit->sinit_max_attempts) {
- asoc->max_init_attempts
- = sinit->sinit_max_attempts;
- }
- if (sinit->sinit_max_init_timeo) {
- asoc->max_init_timeo =
- msecs_to_jiffies(sinit->sinit_max_init_timeo);
- }
+ if (sctp_endpoint_is_peeled_off(ep, daddr)) {
+ err = -EADDRNOTAVAIL;
+ goto free;
}
- /* Prime the peer's transport structures. */
- transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
+ transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
+ SCTP_UNKNOWN);
if (!transport) {
err = -ENOMEM;
- goto out_free;
+ goto free;
}
}
- /* ASSERT: we have a valid association at this point. */
- pr_debug("%s: we have a valid association\n", __func__);
+ return 0;
- if (!sinfo) {
- /* If the user didn't specify SNDINFO/SNDRCVINFO, make up
- * one with some defaults.
- */
- memset(&default_sinfo, 0, sizeof(default_sinfo));
- default_sinfo.sinfo_stream = asoc->default_stream;
- default_sinfo.sinfo_flags = asoc->default_flags;
- default_sinfo.sinfo_ppid = asoc->default_ppid;
- default_sinfo.sinfo_context = asoc->default_context;
- default_sinfo.sinfo_timetolive = asoc->default_timetolive;
- default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
- sinfo = &default_sinfo;
- } else if (fill_sinfo_ttl) {
- /* In case SNDINFO was specified, we still need to fill
- * it with a default ttl from the assoc here.
- */
- sinfo->sinfo_timetolive = asoc->default_timetolive;
- }
+free:
+ sctp_association_free(asoc);
+ return err;
+}
- /* API 7.1.7, the sndbuf size per association bounds the
- * maximum size of data that can be sent in a single send call.
- */
- if (msg_len > sk->sk_sndbuf) {
- err = -EMSGSIZE;
- goto out_free;
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+ __u16 sflags, struct msghdr *msg,
+ size_t msg_len)
+{
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+
+ if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+ return -EPIPE;
+
+ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) &&
+ !sctp_state(asoc, ESTABLISHED))
+ return 0;
+
+ if (sflags & SCTP_EOF) {
+ pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
+
+ return 0;
}
- if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(asoc);
+ if (sflags & SCTP_ABORT) {
+ struct sctp_chunk *chunk;
- /* If fragmentation is disabled and the message length exceeds the
- * association fragmentation point, return EMSGSIZE. The I-D
- * does not specify what this error is, but this looks like
- * a great fit.
- */
- if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
- err = -EMSGSIZE;
- goto out_free;
+ chunk = sctp_make_abort_user(asoc, msg, msg_len);
+ if (!chunk)
+ return -ENOMEM;
+
+ pr_debug("%s: aborting association:%p\n", __func__, asoc);
+ sctp_primitive_ABORT(net, asoc, chunk);
+
+ return 0;
}
- /* Check for invalid stream. */
+ return 1;
+}
+
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+ struct msghdr *msg, size_t msg_len,
+ struct sctp_transport *transport,
+ struct sctp_sndrcvinfo *sinfo)
+{
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+ struct sctp_datamsg *datamsg;
+ bool wait_connect = false;
+ struct sctp_chunk *chunk;
+ long timeo;
+ int err;
+
if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
err = -EINVAL;
- goto out_free;
+ goto err;
}
- /* Allocate sctp_stream_out_ext if not already done */
if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
if (err)
- goto out_free;
+ goto err;
+ }
+
+ if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+ err = -EMSGSIZE;
+ goto err;
}
+ if (asoc->pmtu_pending)
+ sctp_assoc_pending_pmtu(asoc);
+
if (sctp_wspace(asoc) < msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
- timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- /* sk can be changed by peel off when waiting for buf. */
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err) {
- if (err == -ESRCH) {
- /* asoc is already dead. */
- new_asoc = NULL;
- err = -EPIPE;
- }
- goto out_free;
- }
+ if (err)
+ goto err;
}
- /* If an address is passed with the sendto/sendmsg call, it is used
- * to override the primary destination address in the TCP model, or
- * when SCTP_ADDR_OVER flag is set in the UDP model.
- */
- if ((sctp_style(sk, TCP) && msg_name) ||
- (sinfo_flags & SCTP_ADDR_OVER)) {
- chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
- if (!chunk_tp) {
- err = -EINVAL;
- goto out_free;
- }
- } else
- chunk_tp = NULL;
-
- /* Auto-connect, if we aren't connected already. */
if (sctp_state(asoc, CLOSED)) {
err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
- if (err < 0)
- goto out_free;
+ if (err)
+ goto err;
- /* If stream interleave is enabled, wait_connect has to be
- * done earlier than data enqueue, as it needs to make data
- * or idata according to asoc->intl_enable which is set
- * after connection is done.
- */
- if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ if (sctp_sk(sk)->strm_interleave) {
timeo = sock_sndtimeo(sk, 0);
err = sctp_wait_for_connect(asoc, &timeo);
if (err)
- goto out_unlock;
+ goto err;
} else {
wait_connect = true;
}
@@ -2003,73 +1948,186 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
pr_debug("%s: we associated primitively\n", __func__);
}
- /* Break the message into multiple chunks of maximum size. */
datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
if (IS_ERR(datamsg)) {
err = PTR_ERR(datamsg);
- goto out_free;
+ goto err;
}
+
asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
- /* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
sctp_chunk_hold(chunk);
-
- /* Do accounting for the write space. */
sctp_set_owner_w(chunk);
-
- chunk->transport = chunk_tp;
+ chunk->transport = transport;
}
- /* Send it to the lower layers. Note: all chunks
- * must either fail or succeed. The lower layer
- * works that way today. Keep it that way or this
- * breaks.
- */
err = sctp_primitive_SEND(net, asoc, datamsg);
- /* Did the lower layer accept the chunk? */
if (err) {
sctp_datamsg_free(datamsg);
- goto out_free;
+ goto err;
}
pr_debug("%s: we sent primitively\n", __func__);
sctp_datamsg_put(datamsg);
- err = msg_len;
if (unlikely(wait_connect)) {
- timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
sctp_wait_for_connect(asoc, &timeo);
}
- /* If we are already past ASSOCIATE, the lower
- * layers are responsible for association cleanup.
- */
- goto out_unlock;
+ err = msg_len;
-out_free:
- if (new_asoc)
- sctp_association_free(asoc);
-out_unlock:
- release_sock(sk);
+err:
+ return err;
+}
-out_nounlock:
- return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+ const struct msghdr *msg,
+ struct sctp_cmsgs *cmsgs)
+{
+ union sctp_addr *daddr = NULL;
+ int err;
-#if 0
-do_sock_err:
- if (msg_len)
- err = msg_len;
- else
- err = sock_error(sk);
- goto out;
+ if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+ int len = msg->msg_namelen;
-do_interrupted:
- if (msg_len)
- err = msg_len;
- goto out;
-#endif /* 0 */
+ if (len > sizeof(*daddr))
+ len = sizeof(*daddr);
+
+ daddr = (union sctp_addr *)msg->msg_name;
+
+ err = sctp_verify_addr(sk, daddr, len);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct sctp_cmsgs *cmsgs)
+{
+ if (!cmsgs->srinfo && !cmsgs->sinfo) {
+ sinfo->sinfo_stream = asoc->default_stream;
+ sinfo->sinfo_ppid = asoc->default_ppid;
+ sinfo->sinfo_context = asoc->default_context;
+ sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+
+ if (!cmsgs->prinfo)
+ sinfo->sinfo_flags = asoc->default_flags;
+ }
+
+ if (!cmsgs->srinfo && !cmsgs->prinfo)
+ sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+ if (cmsgs->authinfo) {
+ /* Reuse sinfo_tsn to indicate that authinfo was set and
+ * sinfo_ssn to save the keyid on tx path.
+ */
+ sinfo->sinfo_tsn = 1;
+ sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+ }
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct sctp_transport *transport = NULL;
+ struct sctp_sndrcvinfo _sinfo, *sinfo;
+ struct sctp_association *asoc;
+ struct sctp_cmsgs cmsgs;
+ union sctp_addr *daddr;
+ bool new = false;
+ __u16 sflags;
+ int err;
+
+ /* Parse and get snd_info */
+ err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+ if (err)
+ goto out;
+
+ sinfo = &_sinfo;
+ sflags = sinfo->sinfo_flags;
+
+ /* Get daddr from msg */
+ daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+ if (IS_ERR(daddr)) {
+ err = PTR_ERR(daddr);
+ goto out;
+ }
+
+ lock_sock(sk);
+
+ /* SCTP_SENDALL process */
+ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
+ list_for_each_entry(asoc, &ep->asocs, asocs) {
+ err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+ msg_len);
+ if (err == 0)
+ continue;
+ if (err < 0)
+ goto out_unlock;
+
+ sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+ err = sctp_sendmsg_to_asoc(asoc, msg, msg_len,
+ NULL, sinfo);
+ if (err < 0)
+ goto out_unlock;
+
+ iov_iter_revert(&msg->msg_iter, err);
+ }
+
+ goto out_unlock;
+ }
+
+ /* Get and check or create asoc */
+ if (daddr) {
+ asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+ if (asoc) {
+ err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+ msg_len);
+ if (err <= 0)
+ goto out_unlock;
+ } else {
+ err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+ &transport);
+ if (err)
+ goto out_unlock;
+
+ asoc = transport->asoc;
+ new = true;
+ }
+
+ if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+ transport = NULL;
+ } else {
+ asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+ if (!asoc) {
+ err = -EPIPE;
+ goto out_unlock;
+ }
+
+ err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+ if (err <= 0)
+ goto out_unlock;
+ }
+
+ /* Update snd_info with the asoc */
+ sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+ /* Send msg to the asoc */
+ err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+ if (err < 0 && err != -ESRCH && new)
+ sctp_association_free(asoc);
+
+out_unlock:
+ release_sock(sk);
+out:
+ return sctp_error(sk, msg->msg_flags, err);
}
/* This is an extended version of skb_pull() that removes the data from the
@@ -2909,6 +2967,8 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
{
struct sctp_prim prim;
struct sctp_transport *trans;
+ struct sctp_af *af;
+ int err;
if (optlen != sizeof(struct sctp_prim))
return -EINVAL;
@@ -2916,6 +2976,17 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
return -EFAULT;
+ /* Allow security module to validate address but need address len. */
+ af = sctp_get_af_specific(prim.ssp_addr.ss_family);
+ if (!af)
+ return -EINVAL;
+
+ err = security_sctp_bind_connect(sk, SCTP_PRIMARY_ADDR,
+ (struct sockaddr *)&prim.ssp_addr,
+ af->sockaddr_len);
+ if (err)
+ return err;
+
trans = sctp_addr_id2transport(sk, &prim.ssp_addr, prim.ssp_assoc_id);
if (!trans)
return -EINVAL;
@@ -3138,6 +3209,7 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_af *af = sp->pf->af;
struct sctp_assoc_value params;
struct sctp_association *asoc;
int val;
@@ -3162,7 +3234,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (val) {
int min_len, max_len;
- min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+ min_len = SCTP_DEFAULT_MINSEGMENT - af->net_header_len;
+ min_len -= af->ip_options_len(sk);
min_len -= sizeof(struct sctphdr) +
sizeof(struct sctp_data_chunk);
@@ -3175,7 +3248,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
if (val == 0) {
- val = asoc->pathmtu - sp->pf->af->net_header_len;
+ val = asoc->pathmtu - af->net_header_len;
+ val -= af->ip_options_len(sk);
val -= sizeof(struct sctphdr) +
sctp_datachk_len(&asoc->stream);
}
@@ -3244,6 +3318,13 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
return -EADDRNOTAVAIL;
+ /* Allow security module to validate address. */
+ err = security_sctp_bind_connect(sk, SCTP_SET_PEER_PRIMARY_ADDR,
+ (struct sockaddr *)&prim.sspp_addr,
+ af->sockaddr_len);
+ if (err)
+ return err;
+
/* Create an ASCONF chunk with SET_PRIMARY parameter */
chunk = sctp_make_asconf_set_prim(asoc,
(union sctp_addr *)&prim.sspp_addr);
@@ -3624,6 +3705,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
}
/*
+ * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct sctp_authkeyid val;
+ struct sctp_association *asoc;
+
+ if (!ep->auth_enable)
+ return -EACCES;
+
+ if (optlen != sizeof(struct sctp_authkeyid))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, optlen))
+ return -EFAULT;
+
+ asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+ if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
+
+ return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
+/*
* 8.1.23 SCTP_AUTO_ASCONF
*
* This option will enable or disable the use of the automatic generation of
@@ -4215,6 +4323,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_AUTH_DELETE_KEY:
retval = sctp_setsockopt_del_key(sk, optval, optlen);
break;
+ case SCTP_AUTH_DEACTIVATE_KEY:
+ retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+ break;
case SCTP_AUTO_ASCONF:
retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
break;
@@ -5087,9 +5198,11 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
sctp_copy_sock(sock->sk, sk, asoc);
/* Make peeled-off sockets more like 1-1 accepted sockets.
- * Set the daddr and initialize id to something more random
+ * Set the daddr and initialize id to something more random and also
+ * copy over any ip options.
*/
sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->copy_ip_options(sk, sock->sk);
/* Populate the fields of the newsk from the oldsk and migrate the
* asoc to the newsk.
@@ -7189,6 +7302,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_AUTH_KEY:
case SCTP_AUTH_CHUNK:
case SCTP_AUTH_DELETE_KEY:
+ case SCTP_AUTH_DEACTIVATE_KEY:
retval = -EOPNOTSUPP;
break;
case SCTP_HMAC_IDENT:
@@ -7811,8 +7925,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
- SCTP_ABORT | SCTP_EOF))
+ SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+ SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -7835,10 +7949,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
- SCTP_ABORT | SCTP_EOF))
+ SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+ SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
+ case SCTP_PRINFO:
+ /* SCTP Socket API Extension
+ * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo
+ */
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo)))
+ return -EINVAL;
+
+ cmsgs->prinfo = CMSG_DATA(cmsg);
+ if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK)
+ return -EINVAL;
+
+ if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
+ cmsgs->prinfo->pr_value = 0;
+ break;
+ case SCTP_AUTHINFO:
+ /* SCTP Socket API Extension
+ * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo
+ */
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+ return -EINVAL;
+
+ cmsgs->authinfo = CMSG_DATA(cmsg);
+ break;
+ case SCTP_DSTADDRV4:
+ case SCTP_DSTADDRV6:
+ /* SCTP Socket API Extension
+ * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_DSTADDRV4 struct in_addr
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_DSTADDRV6 struct in6_addr
+ */
+ cmsgs->addrs_msg = my_msg;
+ break;
default:
return -EINVAL;
}
@@ -8062,6 +8226,26 @@ static void sctp_wfree(struct sk_buff *skb)
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
+ if (chunk->shkey) {
+ struct sctp_shared_key *shkey = chunk->shkey;
+
+ /* refcnt == 2 and !list_empty mean after this release, it's
+ * not being used anywhere, and it's time to notify userland
+ * that this shkey can be freed if it's been deactivated.
+ */
+ if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+ refcount_read(&shkey->refcnt) == 2) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+ SCTP_AUTH_FREE_KEY,
+ GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+ sctp_auth_shkey_release(chunk->shkey);
+ }
+
sock_wfree(skb);
sctp_wake_up_waiters(sk, asoc);
@@ -8341,6 +8525,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct inet_sock *newinet;
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_endpoint *ep = sp->ep;
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
@@ -8383,7 +8569,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
- security_sk_clone(sk, newsk);
+ /* Set newsk security attributes from orginal sk and connection
+ * security attribute from ep.
+ */
+ security_sctp_sk_clone(ep, sk, newsk);
}
static inline void sctp_copy_descendant(struct sock *sk_to,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da1a5cdefd13..5f8046c62d90 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,13 +7,11 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - non-blocking connect postponed
- * - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
* - support for urgent data postponed
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
@@ -24,7 +22,6 @@
#include <linux/module.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
@@ -66,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
+static struct smc_hashinfo smc_v6_hashinfo = {
+ .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -105,6 +106,18 @@ struct proto smc_proto = {
};
EXPORT_SYMBOL_GPL(smc_proto);
+struct proto smc_proto6 = {
+ .name = "SMC6",
+ .owner = THIS_MODULE,
+ .keepalive = smc_set_keepalive,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v6_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -161,19 +174,22 @@ static void smc_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
}
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+ int protocol)
{
struct smc_sock *smc;
+ struct proto *prot;
struct sock *sk;
- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
- sk->sk_protocol = SMCPROTO_SMC;
+ sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
@@ -200,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
goto out;
rc = -EAFNOSUPPORT;
+ if (addr->sin_family != AF_INET &&
+ addr->sin_family != AF_INET6 &&
+ addr->sin_family != AF_UNSPEC)
+ goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
- if ((addr->sin_family != AF_INET) &&
- ((addr->sin_family != AF_UNSPEC) ||
- (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+ if (addr->sin_family == AF_UNSPEC &&
+ addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
@@ -273,47 +292,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
-{
- struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
- int rc = -ENOENT;
- int len;
-
- if (!dst) {
- rc = -ENOTCONN;
- goto out;
- }
- if (!dst->dev) {
- rc = -ENODEV;
- goto out_rel;
- }
-
- /* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
- for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
- } endfor_ifa(in_dev);
- rcu_read_unlock();
-
-out_rel:
- dst_release(dst);
-out:
- return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
@@ -333,6 +312,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
return rc;
}
+ if (link->llc_confirm_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
@@ -347,11 +329,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
- gid, SMC_LLC_RESP);
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;
- return rc;
+ /* receive ADD LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ /* send add link reject message, only one link supported for now */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -373,19 +377,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
- spin_lock_bh(&smc_lgr_list.lock);
- /* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
-}
-
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
- struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
@@ -439,8 +433,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
- ibport, &aclc.lcl, srv_first_contact);
+ local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+ srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -499,8 +493,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(
- smc, &smcibdev->gid[ibport - 1]);
+ reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
@@ -557,9 +550,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
/* separate smc parameter checking to be safe */
if (alen < sizeof(addr->sa_family))
goto out_err;
- if (addr->sa_family != AF_INET)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
goto out_err;
- smc->addr = addr; /* needed for nonblocking connect */
lock_sock(sk);
switch (sk->sk_state) {
@@ -600,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
int rc;
release_sock(lsk);
- new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
if (!new_sk) {
rc = -ENOMEM;
lsk->sk_err = ENOMEM;
@@ -749,9 +741,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
+ return rc;
}
- return rc;
+ if (link->llc_confirm_resp_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
+ /* send ADD LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive ADD LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
/* setup for RDMA connection of server */
@@ -767,13 +784,10 @@ static void smc_listen_work(struct work_struct *work)
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
- struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
- int rc = 0, len;
- __be32 subnet;
- u8 prefix_len;
+ int rc = 0;
u8 ibport;
/* check if peer is smc capable */
@@ -808,28 +822,19 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
- /* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
- if (rc) {
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
- goto decline_rdma;
- }
-
pclc = (struct smc_clc_msg_proposal *)&buf;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
- if (pclc_prfx->outgoing_subnet != subnet ||
- pclc_prfx->prefix_len != prefix_len) {
+
+ rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+ if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- /* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
-
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc->lcl, 0);
+ local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+ 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -978,10 +983,6 @@ out:
lsmc->clcsock = NULL;
}
release_sock(lsk);
- /* no more listening, wake up smc_close_wait_listen_clcsock and
- * accept
- */
- lsk->sk_state_change(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
@@ -1075,7 +1076,7 @@ out:
}
static int smc_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct smc_sock *smc;
@@ -1085,7 +1086,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
smc = smc_sk(sock->sk);
- return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+ return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
}
static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
@@ -1383,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = {
static int smc_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
struct smc_sock *smc;
struct sock *sk;
int rc;
@@ -1392,22 +1394,24 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -EPROTONOSUPPORT;
- if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+ if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
goto out;
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
- sk = smc_sock_alloc(net, sock);
+ sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
smc->use_fallback = false; /* assume rdma capability first */
- rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
- IPPROTO_TCP, &smc->clcsock);
- if (rc)
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
+ if (rc) {
sk_common_release(sk);
+ goto out;
+ }
smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
@@ -1443,16 +1447,23 @@ static int __init smc_init(void)
rc = proto_register(&smc_proto, 1);
if (rc) {
- pr_err("%s: proto_register fails with %d\n", __func__, rc);
+ pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
goto out_pnet;
}
+ rc = proto_register(&smc_proto6, 1);
+ if (rc) {
+ pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+ goto out_proto;
+ }
+
rc = sock_register(&smc_sock_family_ops);
if (rc) {
pr_err("%s: sock_register fails with %d\n", __func__, rc);
- goto out_proto;
+ goto out_proto6;
}
INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+ INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
rc = smc_ib_register_client();
if (rc) {
@@ -1465,6 +1476,8 @@ static int __init smc_init(void)
out_sock:
sock_unregister(PF_SMC);
+out_proto6:
+ proto_unregister(&smc_proto6);
out_proto:
proto_unregister(&smc_proto);
out_pnet:
@@ -1483,11 +1496,13 @@ static void __exit smc_exit(void)
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
+ cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
+ proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..e4829a2f46ba 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,11 +18,13 @@
#include "smc_ib.h"
-#define SMCPROTO_SMC 0 /* SMC protocol */
+#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_PORTS 2 /* Max # of ports */
extern struct proto smc_proto;
+extern struct proto smc_proto6;
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
@@ -172,7 +174,6 @@ struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
- struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
@@ -263,10 +264,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
struct smc_clc_msg_local;
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
- u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3cd086e5bd28..b42395d24cba 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -269,7 +269,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
return; /* short message */
- if (cdc->len != sizeof(*cdc))
+ if (cdc->len != SMC_WR_TX_SIZE)
return; /* invalid message */
smc_cdc_msg_recv(cdc, link, wc->wr_id);
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..3a988c22f627 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,15 +5,17 @@
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#include <linux/in.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
+#include <net/addrconf.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -22,6 +24,9 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
@@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+ if (!in_dev)
+ return -ENODEV;
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(ipv4, ifa))
+ continue;
+ prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+ prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+ /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+ return 0;
+ } endfor_ifa(in_dev);
+ return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+ struct inet6_ifaddr *ifa;
+ int cnt = 0;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* use a maximum of 8 IPv6 prefixes from device */
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+ &ifa->addr, ifa->prefix_len);
+ ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+ cnt++;
+ if (cnt == SMC_CLC_MAX_V6_PREFIX)
+ break;
+ }
+ prop->ipv6_prefixes_cnt = cnt;
+ if (cnt)
+ return 0;
+#endif
+ return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct sockaddr_storage addrs;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr;
+ int rc = -ENOENT;
+
+ memset(prop, 0, sizeof(*prop));
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+ /* get address to which the internal TCP socket is bound */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ /* analyze IP specific data of net_device belonging to TCP socket */
+ addr6 = (struct sockaddr_in6 *)&addrs;
+ rcu_read_lock();
+ if (addrs.ss_family == PF_INET) {
+ /* IPv4 */
+ addr = (struct sockaddr_in *)&addrs;
+ rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+ } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+ /* mapped IPv4 address - peer is IPv4 only */
+ rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+ prop);
+ } else {
+ /* IPv6 */
+ rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+ }
+ rcu_read_unlock();
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return -ENODEV;
+ for_ifa(in_dev) {
+ if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+ inet_ifa_match(prop->outgoing_subnet, ifa))
+ return 0;
+ } endfor_ifa(in_dev);
+
+ return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dev);
+ struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct inet6_ifaddr *ifa;
+ int i, max;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+ ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+ max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ for (i = 0; i < max; i++) {
+ if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+ ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+ ifa->prefix_len))
+ return 0;
+ }
+ }
+#endif
+ return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ int rc;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+ rcu_read_lock();
+ if (!prop->ipv6_prefixes_cnt)
+ rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+ else
+ rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+ rcu_read_unlock();
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -133,7 +304,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
@@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
+ int len, i, plen, rc;
int reason_code = 0;
- struct kvec vec[3];
+ struct kvec vec[4];
struct msghdr msg;
- int len, plen, rc;
+
+ /* retrieve ip prefixes for CLC proposal msg */
+ rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+ if (rc)
+ return SMC_CLC_DECL_CNFERR; /* configuration error */
/* send SMC Proposal CLC message */
- plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+ plen = sizeof(pclc) + sizeof(pclc_prfx) +
+ (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+ sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
- memset(&pclc_prfx, 0, sizeof(pclc_prfx));
- /* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
- if (rc)
- return SMC_CLC_DECL_CNFERR; /* configuration error */
- pclc_prfx.ipv6_prefixes_cnt = 0;
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec[0].iov_base = &pclc;
- vec[0].iov_len = sizeof(pclc);
- vec[1].iov_base = &pclc_prfx;
- vec[1].iov_len = sizeof(pclc_prfx);
- vec[2].iov_base = &trl;
- vec[2].iov_len = sizeof(trl);
+ i = 0;
+ vec[i].iov_base = &pclc;
+ vec[i++].iov_len = sizeof(pclc);
+ vec[i].iov_base = &pclc_prfx;
+ vec[i++].iov_len = sizeof(pclc_prfx);
+ if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+ vec[i].iov_base = &ipv6_prfx[0];
+ vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ }
+ vec[i].iov_base = &trl;
+ vec[i++].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..63bf1dc2c1f9 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
+#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
@@ -62,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
+#define SMC_CLC_MAX_V6_PREFIX 8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
struct smc_clc_ipv6_prefix {
- u8 prefix[4];
+ struct in6_addr prefix;
u8 prefix_len;
-} __packed;
+} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
@@ -81,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
} __aligned(4);
#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \
+ sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
SMC_CLC_PROPOSAL_MAX_OFFSET + \
+ sizeof(struct smc_clc_msg_proposal_prefix) + \
SMC_CLC_PROPOSAL_MAX_PREFIX + \
sizeof(struct smc_clc_msg_trail))
@@ -124,9 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_prfx_match(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e339c0186dcf..fa41d9881741 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct sock *sk = &smc->sk;
- signed long timeout;
-
- timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
- add_wait_queue(sk_sleep(sk), &wait);
- do {
- release_sock(sk);
- if (smc->clcsock)
- timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
- timeout);
- sched_annotate_sleep();
- lock_sock(sk);
- if (!smc->clcsock)
- break;
- } while (timeout);
- remove_wait_queue(sk_sleep(sk), &wait);
-}
-
/* wait for sndbuf data being transmitted */
static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
{
@@ -204,9 +183,11 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
- smc_close_wait_listen_clcsock(smc);
}
smc_close_cleanup_listen(sk);
+ release_sock(sk);
+ flush_work(&smc->tcp_listen_work);
+ lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2424c7100aaf..f44f6803f7ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,17 @@
static u32 smc_lgr_num; /* unique link group number */
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+ /* client link group creation always follows the server link group
+ * creation. For client use a somewhat higher removal delay time,
+ * otherwise there is a risk of out-of-sync link groups.
+ */
+ mod_delayed_work(system_wq, &lgr->free_work,
+ lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
+}
+
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock);
if (!reduced || lgr->conns_num)
return;
- /* client link group creation always follows the server link group
- * creation. For client use a somewhat higher removal delay time,
- * otherwise there is a risk of out-of-sync link groups.
- */
- mod_delayed_work(system_wq, &lgr->free_work,
- lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
- SMC_LGR_FREE_DELAY_SERV);
+ smc_lgr_schedule_free_work(lgr);
}
static void smc_lgr_free_work(struct work_struct *work)
@@ -140,11 +145,12 @@ static void smc_lgr_free_work(struct work_struct *work)
list_del_init(&lgr->list); /* remove from smc_lgr_list */
free:
spin_unlock_bh(&smc_lgr_list.lock);
- smc_lgr_free(lgr);
+ if (!delayed_work_pending(&lgr->free_work))
+ smc_lgr_free(lgr);
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
char *peer_systemid, unsigned short vlan_id)
{
@@ -161,7 +167,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
}
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
lgr->sync_err = false;
- lgr->daddr = peer_in_addr;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +182,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
+ lnk->state = SMC_LNK_ACTIVATING;
+ lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +205,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
goto destroy_qp;
init_completion(&lnk->llc_confirm);
init_completion(&lnk->llc_confirm_resp);
+ init_completion(&lnk->llc_add);
+ init_completion(&lnk->llc_add_resp);
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
@@ -306,6 +315,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
kfree(lgr);
}
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* terminate linkgroup abnormally */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
@@ -313,15 +331,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
struct smc_sock *smc;
struct rb_node *node;
- spin_lock_bh(&smc_lgr_list.lock);
- if (list_empty(&lgr->list)) {
- /* termination already triggered */
- spin_unlock_bh(&smc_lgr_list.lock);
- return;
- }
- /* do not use this link group for new connections */
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ smc_lgr_forget(lgr);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
@@ -339,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
}
write_unlock_bh(&lgr->conns_lock);
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+ smc_lgr_schedule_free_work(lgr);
}
/* Determine vlan of internal TCP socket.
@@ -400,7 +411,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact)
{
@@ -457,7 +468,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
create:
if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+ rc = smc_lgr_create(smc, smcibdev, ibport,
lcl->id_for_peer, vlan_id);
if (rc)
goto out;
@@ -465,7 +476,7 @@ create:
rc = smc_link_determine_gid(conn->lgr);
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
- conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
+ conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
#ifndef KERNEL_HAS_ATOMIC64
spin_lock_init(&conn->acurs_lock);
#endif
@@ -698,27 +709,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
- struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
{
- u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
- struct smc_link_group *lgr = conn->lgr;
- u32 rkey = ntohl(clc->rmb_rkey);
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
(lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
test_bit(i, lgr->rtokens_used_mask)) {
- conn->rtoken_idx = i;
+ /* already in list */
+ return i;
+ }
+ }
+ i = smc_rmb_reserve_rtoken_idx(lgr);
+ if (i < 0)
+ return i;
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+ u32 rkey = ntohl(nw_rkey);
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ test_bit(i, lgr->rtokens_used_mask)) {
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+ clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
}
- conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+ return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
return 0;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */
SMC_SERV /* server */
};
+enum smc_link_state { /* possible states of a link */
+ SMC_LNK_INACTIVE, /* link is inactive */
+ SMC_LNK_ACTIVATING, /* link is being activated */
+ SMC_LNK_ACTIVE /* link is active */
+};
+
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+
+ enum smc_link_state state; /* state of link */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+ int llc_confirm_rc; /* rc from confirm link msg */
+ int llc_confirm_resp_rc; /* rc from conf_resp msg */
+ struct completion llc_add; /* wait for rx of add link */
+ struct completion llc_add_resp; /* wait for rx of add link rsp*/
};
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
- __be32 daddr; /* destination ip address */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
struct smc_clc_msg_accept_confirm;
void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
int smc_buf_create(struct smc_sock *smc);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2a8957bd6d38..26df554f7588 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -23,6 +23,8 @@
#include "smc_wr.h"
#include "smc.h"
+#define SMC_MAX_CQE 32766 /* max. # of completion queue elements */
+
#define SMC_QP_MIN_RNR_TIMER 5
#define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */
#define SMC_QP_RETRY_CNT 7 /* 7: infinite */
@@ -438,9 +440,15 @@ out:
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
{
struct ib_cq_init_attr cqattr = {
- .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+ .cqe = SMC_MAX_CQE, .comp_vector = 0 };
+ int cqe_size_order, smc_order;
long rc;
+ /* the calculated number of cq entries fits to mlx5 cq allocation */
+ cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+ smc_order = MAX_ORDER - cqe_size_order - 1;
+ if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+ cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
smc_wr_tx_cq_handler, NULL,
smcibdev, &cqattr);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92fe4cc8c82c..ea4b21981b4b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
*
* Link Layer Control (LLC)
*
- * For now, we only support the necessary "confirm link" functionality
- * which happens for the first RoCE link after successful CLC handshake.
- *
* Copyright IBM Corp. 2016
*
* Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
@@ -21,6 +18,122 @@
#include "smc_clc.h"
#include "smc_llc.h"
+#define SMC_LLC_DATA_LEN 40
+
+struct smc_llc_hdr {
+ struct smc_wr_rx_hdr common;
+ u8 length; /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 add_link_rej_rsn:4,
+ reserved:4;
+#endif
+ u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
+
+struct smc_llc_msg_confirm_link { /* type 0x01 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 link_uid[SMC_LGR_ID_SIZE];
+ u8 max_links;
+ u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS 2
+
+struct smc_llc_msg_add_link { /* type 0x02 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 reserved2[2];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 flags2; /* QP mtu */
+ u8 initial_psn[3];
+ u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
+
+struct smc_llc_msg_del_link { /* type 0x04 */
+ struct smc_llc_hdr hd;
+ u8 link_num;
+ __be32 reason;
+ u8 reserved[35];
+} __packed; /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link { /* type 0x07 */
+ struct smc_llc_hdr hd;
+ u8 user_data[16];
+ u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+ union {
+ u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
+ /* is actually the num of rtokens, first */
+ /* rtoken is always for the current link */
+ u8 link_id; /* link id of the rtoken */
+ };
+ __be32 rmb_key;
+ __be64 rmb_vaddr;
+} __packed; /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG 3
+
+struct smc_llc_msg_confirm_rkey { /* type 0x06 */
+ struct smc_llc_hdr hd;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+ u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_NEG 0x20
+
+struct smc_llc_msg_delete_rkey { /* type 0x09 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 err_mask;
+ u8 reserved[2];
+ __be32 rkey[8];
+ u8 reserved2[4];
+};
+
+union smc_llc_msg {
+ struct smc_llc_msg_confirm_link confirm_link;
+ struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_del_link delete_link;
+
+ struct smc_llc_msg_confirm_rkey confirm_rkey;
+ struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+ struct smc_llc_msg_delete_rkey delete_rkey;
+
+ struct smc_llc_msg_test_link test_link;
+ struct {
+ struct smc_llc_hdr hdr;
+ u8 data[SMC_LLC_DATA_LEN];
+ } raw;
+};
+
+#define SMC_LLC_FLAG_RESP 0x80
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
@@ -87,14 +200,112 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(confllc->sender_mac, mac, ETH_ALEN);
memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
- /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
+ confllc->link_num = link->link_id;
memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+ confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+ union ib_gid *gid,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_add_link *addllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP) {
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* always reject more links for now */
+ addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+ addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ }
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_del_link *delllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ delllc = (struct smc_llc_msg_del_link *)wr_buf;
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* DEL_LINK_ALL because only 1 link supported */
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc->link_num = link->link_id;
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_test_link *testllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ testllc = (struct smc_llc_msg_test_link *)wr_buf;
+ memset(testllc, 0, sizeof(*testllc));
+ testllc->hd.common.type = SMC_LLC_TEST_LINK;
+ testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ if (reqresp == SMC_LLC_RESP)
+ testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, llclen);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr;
+ int conf_rc;
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ /* RMBE eyecatchers are not supported */
+ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+ conf_rc = 0;
+ else
+ conf_rc = ENOTSUPP;
+
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
+ if (lgr->role == SMC_SERV &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
+ }
} else {
- if (lgr->role == SMC_CLNT) {
+ if (lgr->role == SMC_CLNT &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
}
+static void smc_llc_rx_add_link(struct smc_link *link,
+ struct smc_llc_msg_add_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (link->state == SMC_LNK_ACTIVATING)
+ complete(&link->llc_add_resp);
+ } else {
+ if (link->state == SMC_LNK_ACTIVATING) {
+ complete(&link->llc_add);
+ return;
+ }
+
+ if (lgr->role == SMC_SERV) {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+
+ } else {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ }
+ }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+ struct smc_llc_msg_del_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (lgr->role == SMC_SERV)
+ smc_lgr_terminate(lgr);
+ } else {
+ if (lgr->role == SMC_SERV) {
+ smc_lgr_forget(lgr);
+ smc_llc_send_delete_link(link, SMC_LLC_REQ);
+ } else {
+ smc_llc_send_delete_link(link, SMC_LLC_RESP);
+ smc_lgr_terminate(lgr);
+ }
+ }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+ struct smc_llc_msg_test_link *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+ }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ int rc;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ rc = smc_rtoken_add(lgr,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+
+ /* ignore rtokens for other links, we have only one link */
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (rc < 0)
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ /* ignore rtokens for other links, we have only one link */
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+ struct smc_llc_msg_delete_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ u8 err_mask = 0;
+ int i, max;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(lgr, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+ }
+
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
+ }
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
- if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+ switch (llc->raw.hdr.common.type) {
+ case SMC_LLC_TEST_LINK:
+ smc_llc_rx_test_link(link, &llc->test_link);
+ break;
+ case SMC_LLC_CONFIRM_LINK:
smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ break;
+ case SMC_LLC_ADD_LINK:
+ smc_llc_rx_add_link(link, &llc->add_link);
+ break;
+ case SMC_LLC_DELETE_LINK:
+ smc_llc_rx_delete_link(link, &llc->delete_link);
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+ break;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ break;
+ case SMC_LLC_DELETE_RKEY:
+ smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ break;
+ }
}
/***************************** init, exit, misc ******************************/
@@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.type = SMC_LLC_CONFIRM_LINK
},
{
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_CONT
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY
+ },
+ {
.handler = NULL,
}
};
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
#define SMC_LLC_FLAG_RESP 0x80
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
+#define SMC_LLC_WAIT_TIME (2 * HZ)
enum smc_llc_reqresp {
SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
enum smc_llc_msg_type {
SMC_LLC_CONFIRM_LINK = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN 40
-
-struct smc_llc_hdr {
- struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
- u8 reserved;
- u8 flags;
-};
-
-struct smc_llc_msg_confirm_link { /* type 0x01 */
- struct smc_llc_hdr hd;
- u8 sender_mac[ETH_ALEN];
- u8 sender_gid[SMC_GID_SIZE];
- u8 sender_qp_num[3];
- u8 link_num;
- u8 link_uid[SMC_LGR_ID_SIZE];
- u8 max_links;
- u8 reserved[9];
-};
-
-union smc_llc_msg {
- struct smc_llc_msg_confirm_link confirm_link;
- struct {
- struct smc_llc_hdr hdr;
- u8 data[SMC_LLC_DATA_LEN];
- } raw;
+ SMC_LLC_ADD_LINK = 0x02,
+ SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_CONFIRM_RKEY = 0x06,
+ SMC_LLC_TEST_LINK = 0x07,
+ SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
+ SMC_LLC_DELETE_RKEY = 0x09,
};
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+ enum smc_llc_reqresp reqresp);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index ef0c3494c9cb..210bec3c3ebe 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -19,7 +19,6 @@
#include "smc.h"
#include "smc_core.h"
-#define SMC_WR_MAX_CQE 32768 /* max. # of completion queue elements */
#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
#define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
diff --git a/net/socket.c b/net/socket.c
index a93c99b518ca..f10f1d947c78 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,6 @@
#include <linux/ipv6_route.h>
#include <linux/route.h>
#include <linux/sockios.h>
-#include <linux/atalk.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
@@ -234,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
return __put_user(klen, ulen);
}
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
@@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
{
return &get_net(container_of(ns, struct net, ns))->ns;
}
+EXPORT_SYMBOL_GPL(get_net_ns);
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
@@ -1332,7 +1332,7 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
}
EXPORT_SYMBOL(sock_create_kern);
-SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
+int __sys_socket(int family, int type, int protocol)
{
int retval;
struct socket *sock;
@@ -1359,12 +1359,16 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
+SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
+{
+ return __sys_socket(family, type, protocol);
+}
+
/*
* Create a pair of connected sockets.
*/
-SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
- int __user *, usockvec)
+int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
{
struct socket *sock1, *sock2;
int fd1, fd2, err;
@@ -1449,6 +1453,12 @@ out:
return err;
}
+SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
+ int __user *, usockvec)
+{
+ return __sys_socketpair(family, type, protocol, usockvec);
+}
+
/*
* Bind a name to a socket. Nothing much to do here since it's
* the protocol's responsibility to handle the local address.
@@ -1457,7 +1467,7 @@ out:
* the protocol layer (having also checked the address is ok).
*/
-SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
+int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
@@ -1480,13 +1490,18 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
return err;
}
+SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
+{
+ return __sys_bind(fd, umyaddr, addrlen);
+}
+
/*
* Perform a listen. Basically, we allow the protocol to do anything
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
-SYSCALL_DEFINE2(listen, int, fd, int, backlog)
+int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
@@ -1507,6 +1522,11 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return err;
}
+SYSCALL_DEFINE2(listen, int, fd, int, backlog)
+{
+ return __sys_listen(fd, backlog);
+}
+
/*
* For accept, we attempt to create a new socket, set up the link
* with the client, wake up the client, then return the new
@@ -1516,11 +1536,11 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
*
* 1003.1g adds the ability to recvmsg() to query connection pending
* status to recvmsg. We need to add that support in a way thats
- * clean when we restucture accept also.
+ * clean when we restructure accept also.
*/
-SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
- int __user *, upeer_addrlen, int, flags)
+int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1573,8 +1593,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
goto out_fd;
if (upeer_sockaddr) {
- if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
- &len, 2) < 0) {
+ len = newsock->ops->getname(newsock,
+ (struct sockaddr *)&address, 2);
+ if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
}
@@ -1599,10 +1620,16 @@ out_fd:
goto out_put;
}
+SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
+ int __user *, upeer_addrlen, int, flags)
+{
+ return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
+}
+
SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
int __user *, upeer_addrlen)
{
- return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
+ return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
}
/*
@@ -1617,8 +1644,7 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
* include the -EINPROGRESS status for such sockets.
*/
-SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
- int, addrlen)
+int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
@@ -1644,17 +1670,23 @@ out:
return err;
}
+SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
+ int, addrlen)
+{
+ return __sys_connect(fd, uservaddr, addrlen);
+}
+
/*
* Get the local address ('name') of a socket object. Move the obtained
* name to user space.
*/
-SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
- int __user *, usockaddr_len)
+int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len)
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
@@ -1664,10 +1696,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
- if (err)
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ if (err < 0)
goto out_put;
- err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
out_put:
fput_light(sock->file, fput_needed);
@@ -1675,17 +1708,23 @@ out:
return err;
}
+SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
+ int __user *, usockaddr_len)
+{
+ return __sys_getsockname(fd, usockaddr, usockaddr_len);
+}
+
/*
* Get the remote address ('name') of a socket object. Move the obtained
* name to user space.
*/
-SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
- int __user *, usockaddr_len)
+int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len)
{
struct socket *sock;
struct sockaddr_storage address;
- int len, err, fput_needed;
+ int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
@@ -1695,26 +1734,29 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
return err;
}
- err =
- sock->ops->getname(sock, (struct sockaddr *)&address, &len,
- 1);
- if (!err)
- err = move_addr_to_user(&address, len, usockaddr,
+ err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ if (err >= 0)
+ /* "err" is actually length in this case */
+ err = move_addr_to_user(&address, err, usockaddr,
usockaddr_len);
fput_light(sock->file, fput_needed);
}
return err;
}
+SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
+ int __user *, usockaddr_len)
+{
+ return __sys_getpeername(fd, usockaddr, usockaddr_len);
+}
+
/*
* Send a datagram to a given address. We move the address into kernel
* space and check the user space data area is readable before invoking
* the protocol.
*/
-
-SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
- unsigned int, flags, struct sockaddr __user *, addr,
- int, addr_len)
+int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
+ struct sockaddr __user *addr, int addr_len)
{
struct socket *sock;
struct sockaddr_storage address;
@@ -1752,6 +1794,13 @@ out:
return err;
}
+SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
+ unsigned int, flags, struct sockaddr __user *, addr,
+ int, addr_len)
+{
+ return __sys_sendto(fd, buff, len, flags, addr, addr_len);
+}
+
/*
* Send a datagram down a socket.
*/
@@ -1759,7 +1808,7 @@ out:
SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
unsigned int, flags)
{
- return sys_sendto(fd, buff, len, flags, NULL, 0);
+ return __sys_sendto(fd, buff, len, flags, NULL, 0);
}
/*
@@ -1767,10 +1816,8 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
* sender. We verify the buffers are writable and if needed move the
* sender address from kernel to user space.
*/
-
-SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
- unsigned int, flags, struct sockaddr __user *, addr,
- int __user *, addr_len)
+int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
+ struct sockaddr __user *addr, int __user *addr_len)
{
struct socket *sock;
struct iovec iov;
@@ -1810,6 +1857,13 @@ out:
return err;
}
+SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
+ unsigned int, flags, struct sockaddr __user *, addr,
+ int __user *, addr_len)
+{
+ return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
+}
+
/*
* Receive a datagram from a socket.
*/
@@ -1817,7 +1871,7 @@ out:
SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
unsigned int, flags)
{
- return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
+ return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}
/*
@@ -1825,8 +1879,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
* to pass the user mode parameter for the protocols to sort out.
*/
-SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
- char __user *, optval, int, optlen)
+static int __sys_setsockopt(int fd, int level, int optname,
+ char __user *optval, int optlen)
{
int err, fput_needed;
struct socket *sock;
@@ -1854,13 +1908,19 @@ out_put:
return err;
}
+SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
+ char __user *, optval, int, optlen)
+{
+ return __sys_setsockopt(fd, level, optname, optval, optlen);
+}
+
/*
* Get a socket option. Because we don't know the option lengths we have
* to pass a user mode parameter for the protocols to sort out.
*/
-SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
- char __user *, optval, int __user *, optlen)
+static int __sys_getsockopt(int fd, int level, int optname,
+ char __user *optval, int __user *optlen)
{
int err, fput_needed;
struct socket *sock;
@@ -1885,11 +1945,17 @@ out_put:
return err;
}
+SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
+ char __user *, optval, int __user *, optlen)
+{
+ return __sys_getsockopt(fd, level, optname, optval, optlen);
+}
+
/*
* Shutdown a socket.
*/
-SYSCALL_DEFINE2(shutdown, int, fd, int, how)
+int __sys_shutdown(int fd, int how)
{
int err, fput_needed;
struct socket *sock;
@@ -1904,6 +1970,11 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
return err;
}
+SYSCALL_DEFINE2(shutdown, int, fd, int, how)
+{
+ return __sys_shutdown(fd, how);
+}
+
/* A couple of helpful macros for getting the address of the 32/64 bit
* fields which are the same type (int / unsigned) on our platforms.
*/
@@ -2067,12 +2138,16 @@ out_freeiov:
* BSD sendmsg interface
*/
-long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
+long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
+ bool forbid_cmsg_compat)
{
int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
+ if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
+ return -EINVAL;
+
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -2086,9 +2161,7 @@ out:
SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
{
- if (flags & MSG_CMSG_COMPAT)
- return -EINVAL;
- return __sys_sendmsg(fd, msg, flags);
+ return __sys_sendmsg(fd, msg, flags, true);
}
/*
@@ -2096,7 +2169,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int
*/
int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
- unsigned int flags)
+ unsigned int flags, bool forbid_cmsg_compat)
{
int fput_needed, err, datagrams;
struct socket *sock;
@@ -2106,6 +2179,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
struct used_address used_address;
unsigned int oflags = flags;
+ if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
+ return -EINVAL;
+
if (vlen > UIO_MAXIOV)
vlen = UIO_MAXIOV;
@@ -2162,9 +2238,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags)
{
- if (flags & MSG_CMSG_COMPAT)
- return -EINVAL;
- return __sys_sendmmsg(fd, mmsg, vlen, flags);
+ return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
}
static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
@@ -2237,12 +2311,16 @@ out_freeiov:
* BSD recvmsg interface
*/
-long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
+long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
+ bool forbid_cmsg_compat)
{
int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
+ if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
+ return -EINVAL;
+
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -2257,9 +2335,7 @@ out:
SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
unsigned int, flags)
{
- if (flags & MSG_CMSG_COMPAT)
- return -EINVAL;
- return __sys_recvmsg(fd, msg, flags);
+ return __sys_recvmsg(fd, msg, flags, true);
}
/*
@@ -2288,10 +2364,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
+ if (likely(!(flags & MSG_ERRQUEUE))) {
+ err = sock_error(sock->sk);
+ if (err) {
+ datagrams = err;
+ goto out_put;
+ }
}
entry = mmsg;
@@ -2375,9 +2453,9 @@ out_put:
return datagrams;
}
-SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
- unsigned int, vlen, unsigned int, flags,
- struct timespec __user *, timeout)
+static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct timespec __user *timeout)
{
int datagrams;
struct timespec timeout_sys;
@@ -2400,6 +2478,13 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
return datagrams;
}
+SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
+ unsigned int, vlen, unsigned int, flags,
+ struct timespec __user *, timeout)
+{
+ return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+}
+
#ifdef __ARCH_WANT_SYS_SOCKETCALL
/* Argument list sizes for sys_socketcall */
#define AL(x) ((x) * sizeof(unsigned long))
@@ -2447,76 +2532,82 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
switch (call) {
case SYS_SOCKET:
- err = sys_socket(a0, a1, a[2]);
+ err = __sys_socket(a0, a1, a[2]);
break;
case SYS_BIND:
- err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+ err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+ err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
- err = sys_listen(a0, a1);
+ err = __sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- err = sys_accept4(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2], 0);
+ err = __sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], 0);
break;
case SYS_GETSOCKNAME:
err =
- sys_getsockname(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2]);
+ __sys_getsockname(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2]);
break;
case SYS_GETPEERNAME:
err =
- sys_getpeername(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2]);
+ __sys_getpeername(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2]);
break;
case SYS_SOCKETPAIR:
- err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+ err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
break;
case SYS_SEND:
- err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+ err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
+ NULL, 0);
break;
case SYS_SENDTO:
- err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
- (struct sockaddr __user *)a[4], a[5]);
+ err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
+ (struct sockaddr __user *)a[4], a[5]);
break;
case SYS_RECV:
- err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+ err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+ NULL, NULL);
break;
case SYS_RECVFROM:
- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
- (struct sockaddr __user *)a[4],
- (int __user *)a[5]);
+ err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+ (struct sockaddr __user *)a[4],
+ (int __user *)a[5]);
break;
case SYS_SHUTDOWN:
- err = sys_shutdown(a0, a1);
+ err = __sys_shutdown(a0, a1);
break;
case SYS_SETSOCKOPT:
- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+ err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
+ a[4]);
break;
case SYS_GETSOCKOPT:
err =
- sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
- (int __user *)a[4]);
+ __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+ (int __user *)a[4]);
break;
case SYS_SENDMSG:
- err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
+ err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
+ a[2], true);
break;
case SYS_SENDMMSG:
- err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
+ err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
+ a[3], true);
break;
case SYS_RECVMSG:
- err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
+ err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
+ a[2], true);
break;
case SYS_RECVMMSG:
- err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
- (struct timespec __user *)a[4]);
+ err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
+ a[3], (struct timespec __user *)a[4]);
break;
case SYS_ACCEPT4:
- err = sys_accept4(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2], a[3]);
+ err = __sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], a[3]);
break;
default:
err = -EINVAL;
@@ -2587,6 +2678,11 @@ void sock_unregister(int family)
}
EXPORT_SYMBOL(sock_unregister);
+bool sock_is_registered(int family)
+{
+ return family < NPROTO && rcu_access_pointer(net_families[family]);
+}
+
static int __init sock_init(void)
{
int err;
@@ -3166,17 +3262,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
}
EXPORT_SYMBOL(kernel_connect);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 0);
+ return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
- int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, addrlen, 1);
+ return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 1fdab5c4eda8..b9283ce5cd85 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -60,7 +60,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
struct sock *sk = strp->sk;
/* Report an error on the lower socket */
- sk->sk_err = err;
+ sk->sk_err = -err;
sk->sk_error_report(sk);
}
}
@@ -458,7 +458,7 @@ static void strp_msg_timeout(struct work_struct *w)
/* Message assembly timed out */
STRP_STATS_INCR(strp->stats.msg_timeouts);
strp->cb.lock(strp);
- strp->cb.abort_parser(strp, ETIMEDOUT);
+ strp->cb.abort_parser(strp, -ETIMEDOUT);
strp->cb.unlock(strp);
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 12649c9fedab..8654494b4d0a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
- err = crypto_ahash_init(req);
- if (err)
- goto out;
err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 1d74d653e6c0..94a2b3f082a8 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
+ __be64 seq_send_be64;
dprintk("RPC: %s\n", __func__);
@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock);
- *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+ seq_send_be64 = cpu_to_be64(seq_send);
+ memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
cksumkey = ctx->initiator_sign;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index dcf9515d9aef..b601a73cc9db 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
u8 flags;
int i;
unsigned int cksum_usage;
+ __be16 be16_ptr;
dprintk("RPC: %s\n", __func__);
- if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+ memcpy(&be16_ptr, (char *) ptr, 2);
+ if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC)
return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2];
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 26531193fce4..5089dbb96d58 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1375,7 +1375,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
struct proc_dir_entry **p = &sn->use_gssp_proc;
sn->use_gss_proxy = -1;
- *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+ *p = proc_create_data("use-gss-proxy", S_IFREG | 0600,
sn->proc_net_rpc,
&use_gss_proxy_ops, net);
if (!*p)
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8a7e1c774f9c..cdda4744c9b1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
struct cache_detail *cd)
{
char tbuf[20];
- char *bp, *ep;
- time_t then, now;
+ char *ep;
+ time_t now;
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
simple_strtoul(tbuf, &ep, 0);
if (*ep && *ep != '\n')
return -EINVAL;
+ /* Note that while we check that 'buf' holds a valid number,
+ * we always ignore the value and just flush everything.
+ * Making use of the number leads to races.
+ */
- bp = tbuf;
- then = get_expiry(&bp);
now = seconds_since_boot();
- cd->nextcheck = now;
- /* Can only set flush_time to 1 second beyond "now", or
- * possibly 1 second beyond flushtime. This is because
- * flush_time never goes backwards so it mustn't get too far
- * ahead of time.
+ /* Always flush everything, so behave like cache_purge()
+ * Do this by advancing flush_time to the current time,
+ * or by one second if it has already reached the current time.
+ * Newly added cache entries will always have ->last_refresh greater
+ * that ->flush_time, so they don't get flushed prematurely.
*/
- if (then >= now) {
- /* Want to flush everything, so behave like cache_purge() */
- if (cd->flush_time >= now)
- now = cd->flush_time + 1;
- then = now;
- }
- cd->flush_time = then;
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+
+ cd->flush_time = now;
+ cd->nextcheck = now;
cache_flush();
*ppos += count;
@@ -1621,20 +1621,20 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
if (cd->procfs == NULL)
goto out_nomem;
- p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
+ p = proc_create_data("flush", S_IFREG | 0600,
cd->procfs, &cache_flush_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
if (cd->cache_request || cd->cache_parse) {
- p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
- cd->procfs, &cache_file_operations_procfs, cd);
+ p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
+ &cache_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
if (cd->cache_show) {
- p = proc_create_data("content", S_IFREG|S_IRUSR,
- cd->procfs, &content_file_operations_procfs, cd);
+ p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
+ &content_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 166f8c1680d1..c2266f387213 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
- struct sockaddr *buf, int buflen)
+ struct sockaddr *buf)
{
struct socket *sock;
int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
goto out_release;
}
- err = kernel_getsockname(sock, buf, &buflen);
+ err = kernel_getsockname(sock, buf);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
rcu_read_unlock();
rpc_set_port(sap, 0);
- err = rpc_sockname(net, sap, salen, buf, buflen);
+ err = rpc_sockname(net, sap, salen, buf);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e980d2a493de..45a033329cd4 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -139,7 +139,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
return;
/* make tasks file */
- if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+ if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
clnt, &tasks_fops))
goto out_err;
@@ -241,7 +241,7 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
return;
/* make tasks file */
- if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+ if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
xprt, &xprt_info_fops)) {
debugfs_remove_recursive(xprt->debugfs);
xprt->debugfs = NULL;
@@ -317,7 +317,7 @@ inject_fault_dir(struct dentry *topdir)
if (!faultdir)
return NULL;
- if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+ if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
NULL, &fault_disconnect_fops))
return NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fc97fc3ed637..0f08934b2cea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -820,13 +820,13 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
{
struct dentry *dentry;
struct inode *dir = d_inode(parent);
- umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
+ umode_t umode = S_IFIFO | 0600;
int err;
if (pipe->ops->upcall == NULL)
- umode &= ~S_IRUGO;
+ umode &= ~0444;
if (pipe->ops->downcall == NULL)
- umode &= ~S_IWUGO;
+ umode &= ~0222;
inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
@@ -1035,7 +1035,7 @@ static const struct rpc_filelist authfiles[] = {
[RPCAUTH_info] = {
.name = "info",
.i_fop = &rpc_info_operations,
- .mode = S_IFREG | S_IRUSR,
+ .mode = S_IFREG | 0400,
},
};
@@ -1068,8 +1068,8 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
{
struct dentry *ret;
- ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
- rpc_clntdir_populate, rpc_client);
+ ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
+ rpc_clntdir_populate, rpc_client);
if (!IS_ERR(ret)) {
rpc_client->cl_pipedir_objects.pdh_dentry = ret;
rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
@@ -1096,17 +1096,17 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
[0] = {
.name = "channel",
.i_fop = &cache_file_operations_pipefs,
- .mode = S_IFREG|S_IRUSR|S_IWUSR,
+ .mode = S_IFREG | 0600,
},
[1] = {
.name = "content",
.i_fop = &content_file_operations_pipefs,
- .mode = S_IFREG|S_IRUSR,
+ .mode = S_IFREG | 0400,
},
[2] = {
.name = "flush",
.i_fop = &cache_flush_operations_pipefs,
- .mode = S_IFREG|S_IRUSR|S_IWUSR,
+ .mode = S_IFREG | 0600,
},
};
@@ -1164,39 +1164,39 @@ enum {
static const struct rpc_filelist files[] = {
[RPCAUTH_lockd] = {
.name = "lockd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_mount] = {
.name = "mount",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfs] = {
.name = "nfs",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_portmap] = {
.name = "portmap",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_statd] = {
.name = "statd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfsd4_cb] = {
.name = "nfsd4_cb",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_cache] = {
.name = "cache",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfsd] = {
.name = "nfsd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_gssd] = {
.name = "gssd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
};
@@ -1261,7 +1261,7 @@ EXPORT_SYMBOL_GPL(rpc_put_sb_net);
static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
[0] = {
.name = "clntXX",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
};
@@ -1310,7 +1310,7 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
[0] = {
.name = "info",
.i_fop = &rpc_dummy_info_operations,
- .mode = S_IFREG | S_IRUSR,
+ .mode = S_IFREG | 0400,
},
};
@@ -1397,7 +1397,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
sb->s_d_op = &simple_dentry_operations;
sb->s_time_gran = 1;
- inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
+ inode = rpc_get_inode(sb, S_IFDIR | 0555);
sb->s_root = root = d_make_root(inode);
if (!root)
return -ENOMEM;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 387cc4add6f6..30a4226baf03 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
/* Syntactic check complete */
serv->sv_stats->rpccnt++;
+ trace_svc_process(rqstp, progp->pg_name);
/* Build the reply header. */
statp = resv->iov_base +resv->iov_len;
@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp)
}
/* Returns 1 for send, 0 for drop */
- if (likely(svc_process_common(rqstp, argv, resv))) {
- int ret = svc_send(rqstp);
+ if (likely(svc_process_common(rqstp, argv, resv)))
+ return svc_send(rqstp);
- trace_svc_process(rqstp, ret);
- return ret;
- }
out_drop:
- trace_svc_process(rqstp, 0);
svc_drop(rqstp);
return 0;
}
@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
return max;
}
EXPORT_SYMBOL_GPL(svc_max_payload);
+
+/**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
+ *
+ * Returns the number of elements populated in the data argument array.
+ */
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct kvec *vec = rqstp->rq_vec;
+ struct page **pages;
+ unsigned int i;
+
+ /* Some types of transport can present the write payload
+ * entirely in rq_arg.pages. In this case, @first is empty.
+ */
+ i = 0;
+ if (first->iov_len) {
+ vec[i].iov_base = first->iov_base;
+ vec[i].iov_len = min_t(size_t, total, first->iov_len);
+ total -= vec[i].iov_len;
+ ++i;
+ }
+
+ WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
+ pages = rqstp->rq_arg.pages;
+ while (total) {
+ vec[i].iov_base = page_address(*pages);
+ vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
+ total -= vec[i].iov_len;
+ ++i;
+
+ ++pages;
+ }
+
+ WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
+ return i;
+}
+EXPORT_SYMBOL_GPL(svc_fill_write_vector);
+
+/**
+ * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of pathname
+ * @total: total length of the pathname argument
+ *
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
+ * released automatically when @rqstp is recycled.
+ */
+char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct xdr_buf *arg = &rqstp->rq_arg;
+ struct page **pages;
+ char *result;
+
+ /* VFS API demands a NUL-terminated pathname. This function
+ * uses a page from @rqstp as the pathname buffer, to enable
+ * direct placement. Thus the total buffer size is PAGE_SIZE.
+ * Space in this buffer for NUL-termination requires that we
+ * cap the size of the returned symlink pathname just a
+ * little early.
+ */
+ if (total > PAGE_SIZE - 1)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ /* Some types of transport can present the pathname entirely
+ * in rq_arg.pages. If not, then copy the pathname into one
+ * page.
+ */
+ pages = arg->pages;
+ WARN_ON_ONCE(arg->page_base != 0);
+ if (first->iov_base == 0) {
+ result = page_address(*pages);
+ result[total] = '\0';
+ } else {
+ size_t len, remaining;
+ char *dst;
+
+ result = page_address(*(rqstp->rq_next_page++));
+ dst = result;
+ remaining = total;
+
+ len = min_t(size_t, total, first->iov_len);
+ memcpy(dst, first->iov_base, len);
+ dst += len;
+ remaining -= len;
+
+ /* No more than one page left */
+ if (remaining) {
+ len = min_t(size_t, remaining, PAGE_SIZE);
+ memcpy(dst, page_address(*pages), len);
+ dst += len;
+ }
+
+ *dst = '\0';
+ }
+
+ /* Sanity check: we don't allow the pathname argument to
+ * contain a NUL byte.
+ */
+ if (strlen(result) != total)
+ return ERR_PTR(-EINVAL);
+ return result;
+}
+EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f9307bd6644b..5185efb9027b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net);
+ strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
int cpu;
if (!svc_xprt_has_something_to_do(xprt))
- goto out;
+ return;
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
* the transport twice.
*/
- if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
- /* Don't enqueue transport while already enqueued */
- dprintk("svc: transport %p busy, not enqueued\n", xprt);
- goto out;
- }
+ if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
atomic_long_inc(&pool->sp_stats.packets);
- dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
atomic_long_inc(&pool->sp_stats.threads_woken);
+ rqstp->rq_qtime = ktime_get();
wake_up_process(rqstp->rq_task);
goto out_unlock;
}
@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
out_unlock:
rcu_read_unlock();
put_cpu();
-out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
struct svc_xprt, xpt_ready);
list_del_init(&xprt->xpt_ready);
svc_xprt_get(xprt);
-
- dprintk("svc: transport %p dequeued, inuse=%d\n",
- xprt, kref_read(&xprt->xpt_ref));
}
spin_unlock_bh(&pool->sp_lock);
out:
- trace_svc_xprt_dequeue(xprt);
return xprt;
}
@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv)
if (test_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
rcu_read_unlock();
- dprintk("svc: daemon %p woken up.\n", rqstp);
wake_up_process(rqstp->rq_task);
trace_svc_wake_up(rqstp->rq_task->pid);
return;
@@ -734,6 +726,7 @@ out_found:
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
+ trace_svc_xprt_dequeue(rqstp);
return rqstp->rq_xprt;
}
@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- dprintk("svc: got len=%d\n", len);
+ rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- if (xprt->xpt_ops->xpo_secure_port(rqstp))
- set_bit(RQ_SECURE, &rqstp->rq_flags);
- else
- clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
@@ -859,7 +849,6 @@ out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
out:
- trace_svc_recv(rqstp, err);
return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp)
goto out;
/* release the receive skb before sending the reply */
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
+ trace_svc_stats_latency(rqstp);
if (test_bit(XPT_DEAD, &xprt->xpt_flags)
|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp)
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
rpc_wake_up(&xprt->xpt_bc_pending);
+ trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
len = 0;
out:
- trace_svc_send(rqstp, len);
return len;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..5445145e639c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
}
-static int svc_sock_secure_port(struct svc_rqst *rqstp)
+static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
- return svc_port_is_privileged(svc_addr(rqstp));
+ if (svc_port_is_privileged(svc_addr(rqstp)))
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -832,12 +835,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- err = kernel_getpeername(newsock, sin, &slen);
+ err = kernel_getpeername(newsock, sin);
if (err < 0) {
net_warn_ratelimited("%s: peername failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
+ slen = err;
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +870,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
if (IS_ERR(newsvsk))
goto failed;
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
- err = kernel_getsockname(newsock, sin, &slen);
+ err = kernel_getsockname(newsock, sin);
+ slen = err;
if (unlikely(err < 0)) {
dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
slen = offsetof(struct sockaddr, sa_data);
@@ -1307,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
+ strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
@@ -1465,7 +1471,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
err = PTR_ERR(svsk);
goto out;
}
- if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+ salen = kernel_getsockname(svsk->sk_sock, sin);
+ if (salen >= 0)
svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1546,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
if (error < 0)
goto bummer;
- newlen = len;
- error = kernel_getsockname(sock, newsin, &newlen);
+ error = kernel_getsockname(sock, newsin);
if (error < 0)
goto bummer;
+ newlen = error;
if (protocol == IPPROTO_TCP) {
if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index a4a8f6989ee7..dd8a431dc2ae 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -51,9 +51,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
-unsigned int svcrdma_ord = RPCRDMA_ORD;
+unsigned int svcrdma_ord = 16; /* historical default */
static unsigned int min_ord = 1;
-static unsigned int max_ord = 4096;
+static unsigned int max_ord = 255;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 19e9c6b33042..3d45015dca97 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -110,15 +110,16 @@
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
-static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *ctxt,
- u32 byte_count)
+static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *ctxt)
{
struct page *page;
- u32 bc;
int sge_no;
+ u32 len;
- /* Swap the page in the SGE with the page in argpages */
+ /* The reply path assumes the Call's transport header resides
+ * in rqstp->rq_pages[0].
+ */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len =
- min_t(size_t, byte_count, ctxt->sge[0].length);
- rqstp->rq_arg.len = byte_count;
- rqstp->rq_arg.buflen = byte_count;
+ min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
+ rqstp->rq_arg.len = ctxt->byte_len;
+ rqstp->rq_arg.buflen = ctxt->byte_len;
/* Compute bytes past head in the SGL */
- bc = byte_count - rqstp->rq_arg.head[0].iov_len;
+ len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
- rqstp->rq_arg.page_len = bc;
+ rqstp->rq_arg.page_len = len;
rqstp->rq_arg.page_base = 0;
sge_no = 1;
- while (bc && sge_no < ctxt->count) {
+ while (len && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+ len -= min_t(u32, len, ctxt->sge[sge_no].length);
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* If not all pages were used from the SGL, free the remaining ones */
- bc = sge_no;
+ len = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
- ctxt->count = bc;
+ ctxt->count = len;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
- /* Build up the XDR from the receive buffers. */
- rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
+ svc_rdma_build_arg_xdr(rqstp, ctxt);
- /* Decode the RDMA header. */
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
if (ret < 0)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9ad12a215b51..96cc8f6597d3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = {
@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
+ pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
- if (listener)
+ if (listener) {
+ strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
+ }
return cma_xprt;
}
@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
- /*
- * Limit ORD based on client limit, local device limit, and
- * configured svcrdma limit.
- */
- newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
- newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
-
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0;
- conn_param.initiator_depth = newxprt->sc_ord;
+ conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
+ dev->attrs.max_qp_init_rd_atom);
+ if (!conn_param.initiator_depth) {
+ dprintk("svcrdma: invalid ORD setting\n");
+ ret = -EINVAL;
+ goto errout;
+ }
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret) {
- dprintk("svcrdma: failed to accept new connection, ret=%d\n",
- ret);
+ if (ret)
goto errout;
- }
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
- dprintk(" ord : %d\n", newxprt->sc_ord);
+ dprintk(" ord : %d\n", conn_param.initiator_depth);
return &newxprt->sc_xprt;
@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
-static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- return 1;
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e3c6a3df278b..c8902f11efdd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1798,10 +1798,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
- int buflen;
unsigned short port = 0;
- if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+ if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
goto out;
switch (buf.ss_family) {
case AF_INET6:
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c25a3a149dc4..e450212121d2 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP
Saying Y here will enable support for running TIPC over IP/UDP
bool
default y
+
+config TIPC_DIAG
+ tristate "TIPC: socket monitoring interface"
+ depends on TIPC
+ default y
+ ---help---
+ Support for TIPC socket monitoring interface used by ss tool.
+ If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 37bb0bfbd936..aca168f2abb1 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,8 +9,13 @@ tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- server.o socket.o group.o
+ topsrv.o socket.o group.o
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
+
+
+obj-$(CONFIG_TIPC_DIAG) += diag.o
+
+tipc_diag-y := diag.o
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 48fd3b5a73fb..b88d48d00913 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -1,7 +1,7 @@
/*
* net/tipc/addr.c: TIPC address utility routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -34,113 +34,90 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/kernel.h>
#include "addr.h"
#include "core.h"
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-int in_own_cluster(struct net *net, u32 addr)
-{
- return in_own_cluster_exact(net, addr) || !addr;
-}
-
-int in_own_cluster_exact(struct net *net, u32 addr)
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- return !((addr ^ tn->own_addr) >> 12);
+ if (!domain || (domain == addr))
+ return true;
+ if (!legacy_format)
+ return false;
+ if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
+ return true;
+ if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */
+ return true;
+ if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
+ return true;
+ return false;
}
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-int in_own_node(struct net *net, u32 addr)
+void tipc_set_node_id(struct net *net, u8 *id)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
+ u32 *tmp = (u32 *)id;
- return (addr == tn->own_addr) || !addr;
+ memcpy(tn->node_id, id, NODE_ID_LEN);
+ tipc_nodeid2string(tn->node_id_string, id);
+ tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
+ pr_info("Own node identity %s, cluster identity %u\n",
+ tipc_own_id_string(net), tn->net_id);
}
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
+void tipc_set_node_addr(struct net *net, u32 addr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
+ u8 node_id[NODE_ID_LEN] = {0,};
- if (likely(sc == TIPC_NODE_SCOPE))
- return tn->own_addr;
- if (sc == TIPC_CLUSTER_SCOPE)
- return tipc_cluster_mask(tn->own_addr);
- return tipc_zone_mask(tn->own_addr);
+ tn->node_addr = addr;
+ if (!tipc_own_id(net)) {
+ sprintf(node_id, "%x", addr);
+ tipc_set_node_id(net, node_id);
+ }
+ tn->trial_addr = addr;
+ pr_info("32-bit node address hash set to %x\n", addr);
}
-/**
- * tipc_addr_domain_valid - validates a network domain address
- *
- * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
- * where Z, C, and N are non-zero.
- *
- * Returns 1 if domain address is valid, otherwise 0
- */
-int tipc_addr_domain_valid(u32 addr)
+char *tipc_nodeid2string(char *str, u8 *id)
{
- u32 n = tipc_node(addr);
- u32 c = tipc_cluster(addr);
- u32 z = tipc_zone(addr);
-
- if (n && (!z || !c))
- return 0;
- if (c && !z)
- return 0;
- return 1;
-}
+ int i;
+ u8 c;
-/**
- * tipc_addr_node_valid - validates a proposed network address for this node
- *
- * Accepts <Z.C.N>, where Z, C, and N are non-zero.
- *
- * Returns 1 if address can be used, otherwise 0
- */
-int tipc_addr_node_valid(u32 addr)
-{
- return tipc_addr_domain_valid(addr) && tipc_node(addr);
-}
+ /* Already a string ? */
+ for (i = 0; i < NODE_ID_LEN; i++) {
+ c = id[i];
+ if (c >= '0' && c <= '9')
+ continue;
+ if (c >= 'A' && c <= 'Z')
+ continue;
+ if (c >= 'a' && c <= 'z')
+ continue;
+ if (c == '.')
+ continue;
+ if (c == ':')
+ continue;
+ if (c == '_')
+ continue;
+ if (c == '-')
+ continue;
+ if (c == '@')
+ continue;
+ if (c != 0)
+ break;
+ }
+ if (i == NODE_ID_LEN) {
+ memcpy(str, id, NODE_ID_LEN);
+ str[NODE_ID_LEN] = 0;
+ return str;
+ }
-int tipc_in_scope(u32 domain, u32 addr)
-{
- if (!domain || (domain == addr))
- return 1;
- if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
- return 1;
- if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
- return 1;
- return 0;
-}
+ /* Translate to hex string */
+ for (i = 0; i < NODE_ID_LEN; i++)
+ sprintf(&str[2 * i], "%02x", id[i]);
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
- if (likely(!domain))
- return TIPC_ZONE_SCOPE;
- if (tipc_node(domain))
- return TIPC_NODE_SCOPE;
- if (tipc_cluster(domain))
- return TIPC_CLUSTER_SCOPE;
- return TIPC_ZONE_SCOPE;
-}
+ /* Strip off trailing zeroes */
+ for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--)
+ str[i] = 0;
-char *tipc_addr_string_fill(char *string, u32 addr)
-{
- snprintf(string, 16, "<%u.%u.%u>",
- tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
- return string;
+ return str;
}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index bebb347803ce..31bee0ea7b3e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -1,7 +1,7 @@
/*
* net/tipc/addr.h: Include file for TIPC address utility routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, Wind River Systems
* All rights reserved.
*
@@ -45,14 +45,21 @@
static inline u32 tipc_own_addr(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ return tipc_net(net)->node_addr;
+}
+
+static inline u8 *tipc_own_id(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
- return tn->own_addr;
+ if (!strlen(tn->node_id_string))
+ return NULL;
+ return tn->node_id;
}
-static inline u32 tipc_zone_mask(u32 addr)
+static inline char *tipc_own_id_string(struct net *net)
{
- return addr & TIPC_ZONE_MASK;
+ return tipc_net(net)->node_id_string;
}
static inline u32 tipc_cluster_mask(u32 addr)
@@ -60,15 +67,25 @@ static inline u32 tipc_cluster_mask(u32 addr)
return addr & TIPC_ZONE_CLUSTER_MASK;
}
-u32 tipc_own_addr(struct net *net);
-int in_own_cluster(struct net *net, u32 addr);
-int in_own_cluster_exact(struct net *net, u32 addr);
-int in_own_node(struct net *net, u32 addr);
-u32 addr_domain(struct net *net, u32 sc);
-int tipc_addr_domain_valid(u32);
-int tipc_addr_node_valid(u32 addr);
-int tipc_in_scope(u32 domain, u32 addr);
-int tipc_addr_scope(u32 domain);
-char *tipc_addr_string_fill(char *string, u32 addr);
+static inline int tipc_node2scope(u32 node)
+{
+ return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+ return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
+static inline int in_own_node(struct net *net, u32 addr)
+{
+ return addr == tipc_own_addr(net) || !addr;
+}
+
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
+void tipc_set_node_id(struct net *net, u8 *id);
+void tipc_set_node_addr(struct net *net, u32 addr);
+char *tipc_nodeid2string(char *str, u8 *id);
+u32 tipc_node_id2hash(u8 *id128);
#endif
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 37892b3909af..f3711176be45 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
{
tipc_dest_list_purge(&nl->list);
nl->remote = 0;
- nl->local = 0;
+ nl->local = false;
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 3e3dce3d4c63..f7d47c89d658 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (b)
- tipc_disc_add_dest(b->link_req);
+ tipc_disc_add_dest(b->disc);
rcu_read_unlock();
}
@@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (b)
- tipc_disc_remove_dest(b->link_req);
+ tipc_disc_remove_dest(b->disc);
rcu_read_unlock();
}
@@ -230,88 +230,67 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
* tipc_enable_bearer - enable bearer with the given name
*/
static int tipc_enable_bearer(struct net *net, const char *name,
- u32 disc_domain, u32 priority,
+ u32 disc_domain, u32 prio,
struct nlattr *attr[])
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bearer_names b_names;
+ int with_this_prio = 1;
struct tipc_bearer *b;
struct tipc_media *m;
- struct tipc_bearer_names b_names;
struct sk_buff *skb;
- char addr_string[16];
- u32 bearer_id;
- u32 with_this_prio;
- u32 i;
+ int bearer_id = 0;
int res = -EINVAL;
+ char *errstr = "";
- if (!tn->own_addr) {
- pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
- name);
- return -ENOPROTOOPT;
- }
if (!bearer_name_validate(name, &b_names)) {
- pr_warn("Bearer <%s> rejected, illegal name\n", name);
- return -EINVAL;
- }
- if (tipc_addr_domain_valid(disc_domain) &&
- (disc_domain != tn->own_addr)) {
- if (tipc_in_scope(disc_domain, tn->own_addr)) {
- disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
- res = 0; /* accept any node in own cluster */
- } else if (in_own_cluster_exact(net, disc_domain))
- res = 0; /* accept specified node in own cluster */
+ errstr = "illegal name";
+ goto rejected;
}
- if (res) {
- pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
- name);
- return -EINVAL;
- }
- if ((priority > TIPC_MAX_LINK_PRI) &&
- (priority != TIPC_MEDIA_LINK_PRI)) {
- pr_warn("Bearer <%s> rejected, illegal priority\n", name);
- return -EINVAL;
+
+ if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+ errstr = "illegal priority";
+ goto rejected;
}
m = tipc_media_find(b_names.media_name);
if (!m) {
- pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
- name, b_names.media_name);
- return -EINVAL;
+ errstr = "media not registered";
+ goto rejected;
}
- if (priority == TIPC_MEDIA_LINK_PRI)
- priority = m->priority;
+ if (prio == TIPC_MEDIA_LINK_PRI)
+ prio = m->priority;
-restart:
- bearer_id = MAX_BEARERS;
- with_this_prio = 1;
- for (i = MAX_BEARERS; i-- != 0; ) {
- b = rtnl_dereference(tn->bearer_list[i]);
- if (!b) {
- bearer_id = i;
- continue;
- }
+ /* Check new bearer vs existing ones and find free bearer id if any */
+ while (bearer_id < MAX_BEARERS) {
+ b = rtnl_dereference(tn->bearer_list[bearer_id]);
+ if (!b)
+ break;
if (!strcmp(name, b->name)) {
- pr_warn("Bearer <%s> rejected, already enabled\n",
- name);
- return -EINVAL;
+ errstr = "already enabled";
+ goto rejected;
}
- if ((b->priority == priority) &&
- (++with_this_prio > 2)) {
- if (priority-- == 0) {
- pr_warn("Bearer <%s> rejected, duplicate priority\n",
- name);
- return -EINVAL;
- }
- pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
- name, priority + 1, priority);
- goto restart;
+ bearer_id++;
+ if (b->priority != prio)
+ continue;
+ if (++with_this_prio <= 2)
+ continue;
+ pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+ name, prio);
+ if (prio == TIPC_MIN_LINK_PRI) {
+ errstr = "cannot adjust to lower";
+ goto rejected;
}
+ pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
+ prio--;
+ bearer_id = 0;
+ with_this_prio = 1;
}
+
if (bearer_id >= MAX_BEARERS) {
- pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
- name, MAX_BEARERS);
- return -EINVAL;
+ errstr = "max 3 bearers permitted";
+ goto rejected;
}
b = kzalloc(sizeof(*b), GFP_ATOMIC);
@@ -322,10 +301,9 @@ restart:
b->media = m;
res = m->enable_media(net, b, attr);
if (res) {
- pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
- name, -res);
kfree(b);
- return -EINVAL;
+ errstr = "failed to enable media";
+ goto rejected;
}
b->identity = bearer_id;
@@ -333,15 +311,15 @@ restart:
b->window = m->window;
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
- b->priority = priority;
+ b->priority = prio;
test_and_set_bit_lock(0, &b->up);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
bearer_disable(net, b);
- pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
- name);
- return -EINVAL;
+ kfree(b);
+ errstr = "failed to create discoverer";
+ goto rejected;
}
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
@@ -353,9 +331,11 @@ restart:
return -ENOMEM;
}
- pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
- name,
- tipc_addr_string_fill(addr_string, disc_domain), priority);
+ pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
+ return res;
+rejected:
+ pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr);
return res;
}
@@ -385,8 +365,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
tipc_node_delete_links(net, bearer_id);
b->media->disable_media(b);
RCU_INIT_POINTER(b->media_ptr, NULL);
- if (b->link_req)
- tipc_disc_delete(b->link_req);
+ if (b->disc)
+ tipc_disc_delete(b->disc);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
kfree_rcu(b, rcu);
tipc_mon_delete(net, bearer_id);
@@ -395,11 +375,13 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attr[])
{
+ char *dev_name = strchr((const char *)b->name, ':') + 1;
+ int hwaddr_len = b->media->hwaddr_len;
+ u8 node_id[NODE_ID_LEN] = {0,};
struct net_device *dev;
- char *driver_name = strchr((const char *)b->name, ':') + 1;
/* Find device with specified name */
- dev = dev_get_by_name(net, driver_name);
+ dev = dev_get_by_name(net, dev_name);
if (!dev)
return -ENODEV;
if (tipc_mtu_bad(dev, 0)) {
@@ -407,6 +389,16 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
return -EINVAL;
}
+ /* Autoconfigure own node identity if needed */
+ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
+ memcpy(node_id, dev->dev_addr, hwaddr_len);
+ tipc_net_init(net, node_id, 0);
+ }
+ if (!tipc_own_id(net)) {
+ pr_warn("Failed to obtain node identity\n");
+ return -EINVAL;
+ }
+
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
b->pt.dev = dev;
@@ -414,7 +406,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
b->pt.func = tipc_l2_rcv_msg;
dev_add_pack(&b->pt);
memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
- memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
+ memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
b->mtu = dev->mtu;
@@ -861,12 +853,10 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
char *bearer;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- u32 domain;
+ u32 domain = 0;
u32 prio;
prio = TIPC_MEDIA_LINK_PRI;
- domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
@@ -956,11 +946,11 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
{
- int err;
- char *name;
struct tipc_bearer *b;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
+ char *name;
+ int err;
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
@@ -987,8 +977,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- if (props[TIPC_NLA_PROP_TOL])
+ if (props[TIPC_NLA_PROP_TOL]) {
b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ tipc_node_apply_tolerance(net, b);
+ }
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index a53613d95bc9..6efcee63a381 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -159,7 +159,7 @@ struct tipc_bearer {
u32 tolerance;
u32 domain;
u32 identity;
- struct tipc_link_req *link_req;
+ struct tipc_discoverer *disc;
char net_plane;
unsigned long up;
};
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 0b982d048fb9..5b38f5164281 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -56,7 +56,11 @@ static int __net_init tipc_init_net(struct net *net)
int err;
tn->net_id = 4711;
- tn->own_addr = 0;
+ tn->node_addr = 0;
+ tn->trial_addr = 0;
+ tn->addr_trial_end = 0;
+ memset(tn->node_id, 0, sizeof(tn->node_id));
+ memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
get_random_bytes(&tn->random, sizeof(int));
INIT_LIST_HEAD(&tn->node_list);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 20b21af2ff14..8020a6c360ff 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -1,7 +1,7 @@
/*
* net/tipc/core.h: Include file for TIPC global declarations
*
- * Copyright (c) 2005-2006, 2013 Ericsson AB
+ * Copyright (c) 2005-2006, 2013-2018 Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -58,13 +58,14 @@
#include <linux/etherdevice.h>
#include <net/netns/generic.h>
#include <linux/rhashtable.h>
+#include <net/genetlink.h>
struct tipc_node;
struct tipc_bearer;
struct tipc_bc_base;
struct tipc_link;
struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
struct tipc_monitor;
#define TIPC_MOD_VER "2.0.0"
@@ -72,15 +73,22 @@ struct tipc_monitor;
#define NODE_HTABLE_SIZE 512
#define MAX_BEARERS 3
#define TIPC_DEF_MON_THRESHOLD 32
+#define NODE_ID_LEN 16
+#define NODE_ID_STR_LEN (NODE_ID_LEN * 2 + 1)
extern unsigned int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
extern int sysctl_tipc_named_timeout __read_mostly;
struct tipc_net {
- u32 own_addr;
+ u8 node_id[NODE_ID_LEN];
+ u32 node_addr;
+ u32 trial_addr;
+ unsigned long addr_trial_end;
+ char node_id_string[NODE_ID_STR_LEN];
int net_id;
int random;
+ bool legacy_addr_format;
/* Node table and node list */
spinlock_t node_list_lock;
@@ -112,7 +120,7 @@ struct tipc_net {
struct list_head dist_queue;
/* Topology subscription server */
- struct tipc_server *topsrv;
+ struct tipc_topsrv *topsrv;
atomic_t subscription_count;
};
@@ -131,7 +139,12 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+ return tipc_net(net)->nametbl;
+}
+
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
{
return tipc_net(net)->topsrv;
}
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644
index 000000000000..aaabb0b776dd
--- /dev/null
+++ b/net/tipc/diag.c
@@ -0,0 +1,114 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+ u32 res[2];
+
+ sock_diag_save_cookie(sk, res);
+ return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk)
+{
+ struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+ NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states,
+ __tipc_diag_gen_cookie);
+ if (err)
+ return err;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+ struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct tipc_sock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = tipc_diag_dump,
+ };
+ netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+ .family = AF_TIPC,
+ .dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+ return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+ sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 92e4828c6b09..9f666e0650e2 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
/*
* net/tipc/discover.c
*
- * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2018, Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -39,34 +39,34 @@
#include "discover.h"
/* min delay during bearer start up */
-#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125)
+#define TIPC_DISC_INIT msecs_to_jiffies(125)
/* max delay if bearer has no links */
-#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000)
+#define TIPC_DISC_FAST msecs_to_jiffies(1000)
/* max delay if bearer has links */
-#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000)
+#define TIPC_DISC_SLOW msecs_to_jiffies(60000)
/* indicates no timer in use */
-#define TIPC_LINK_REQ_INACTIVE 0xffffffff
+#define TIPC_DISC_INACTIVE 0xffffffff
/**
- * struct tipc_link_req - information about an ongoing link setup request
+ * struct tipc_discoverer - information about an ongoing link setup request
* @bearer_id: identity of bearer issuing requests
* @net: network namespace instance
* @dest: destination address for request messages
* @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
* @lock: spinlock for controlling access to requests
- * @buf: request message to be (repeatedly) sent
+ * @skb: request message to be (repeatedly) sent
* @timer: timer governing period between requests
* @timer_intv: current interval between requests (in ms)
*/
-struct tipc_link_req {
+struct tipc_discoverer {
u32 bearer_id;
struct tipc_media_addr dest;
struct net *net;
u32 domain;
int num_nodes;
spinlock_t lock;
- struct sk_buff *buf;
+ struct sk_buff *skb;
struct timer_list timer;
unsigned long timer_intv;
};
@@ -77,22 +77,42 @@ struct tipc_link_req {
* @type: message type (request or response)
* @b: ptr to bearer issuing message
*/
-static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
- struct tipc_bearer *b)
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
+ u32 mtyp, struct tipc_bearer *b)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_msg *msg;
+ struct tipc_net *tn = tipc_net(net);
u32 dest_domain = b->domain;
+ struct tipc_msg *hdr;
- msg = buf_msg(buf);
- tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
+ hdr = buf_msg(skb);
+ tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
MAX_H_SIZE, dest_domain);
- msg_set_non_seq(msg, 1);
- msg_set_node_sig(msg, tn->random);
- msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
- msg_set_dest_domain(msg, dest_domain);
- msg_set_bc_netid(msg, tn->net_id);
- b->media->addr2msg(msg_media_addr(msg), &b->addr);
+ msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
+ msg_set_non_seq(hdr, 1);
+ msg_set_node_sig(hdr, tn->random);
+ msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
+ msg_set_dest_domain(hdr, dest_domain);
+ msg_set_bc_netid(hdr, tn->net_id);
+ b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_node_id(hdr, tipc_own_id(net));
+}
+
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
+ u32 src, u32 sugg_addr,
+ struct tipc_media_addr *maddr,
+ struct tipc_bearer *b)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+
+ skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+ if (!skb)
+ return;
+ hdr = buf_msg(skb);
+ tipc_disc_init_msg(net, skb, mtyp, b);
+ msg_set_sugg_node_addr(hdr, sugg_addr);
+ msg_set_dest_domain(hdr, dst);
+ tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
}
/**
@@ -104,161 +124,207 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
struct tipc_media_addr *media_addr)
{
- char node_addr_str[16];
char media_addr_str[64];
- tipc_addr_string_fill(node_addr_str, node_addr);
tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
media_addr);
- pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
+ pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
media_addr_str, b->name);
}
+/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ */
+static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+ struct tipc_media_addr *maddr,
+ struct tipc_bearer *b,
+ u32 dst, u32 src,
+ u32 sugg_addr,
+ u8 *peer_id,
+ int mtyp)
+{
+ struct net *net = d->net;
+ struct tipc_net *tn = tipc_net(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end);
+ u32 self = tipc_own_addr(net);
+
+ if (mtyp == DSC_TRIAL_FAIL_MSG) {
+ if (!trial)
+ return true;
+
+ /* Ignore if somebody else already gave new suggestion */
+ if (dst != tn->trial_addr)
+ return true;
+
+ /* Otherwise update trial address and restart trial period */
+ tn->trial_addr = sugg_addr;
+ msg_set_prevnode(buf_msg(d->skb), sugg_addr);
+ tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+ return true;
+ }
+
+ /* Apply trial address if we just left trial period */
+ if (!trial && !self) {
+ tipc_net_finalize(net, tn->trial_addr);
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ }
+
+ if (mtyp != DSC_TRIAL_MSG)
+ return false;
+
+ sugg_addr = tipc_node_try_addr(net, peer_id, src);
+ if (sugg_addr)
+ tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
+ self, sugg_addr, maddr, b);
+ return true;
+}
+
/**
* tipc_disc_rcv - handle incoming discovery message (request or response)
- * @net: the applicable net namespace
- * @buf: buffer containing message
- * @bearer: bearer that message arrived on
+ * @net: applicable net namespace
+ * @skb: buffer containing message
+ * @b: bearer that message arrived on
*/
void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
- struct tipc_bearer *bearer)
+ struct tipc_bearer *b)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_media_addr maddr;
- struct sk_buff *rskb;
+ struct tipc_net *tn = tipc_net(net);
struct tipc_msg *hdr = buf_msg(skb);
- u32 ddom = msg_dest_domain(hdr);
- u32 onode = msg_prevnode(hdr);
+ u16 caps = msg_node_capabilities(hdr);
+ bool legacy = tn->legacy_addr_format;
+ u32 sugg = msg_sugg_node_addr(hdr);
+ u32 signature = msg_node_sig(hdr);
+ u8 peer_id[NODE_ID_LEN] = {0,};
+ u32 dst = msg_dest_domain(hdr);
u32 net_id = msg_bc_netid(hdr);
+ struct tipc_media_addr maddr;
+ u32 src = msg_prevnode(hdr);
u32 mtyp = msg_type(hdr);
- u32 signature = msg_node_sig(hdr);
- u16 caps = msg_node_capabilities(hdr);
- bool respond = false;
bool dupl_addr = false;
+ bool respond = false;
+ u32 self;
int err;
- err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
- kfree_skb(skb);
- if (err)
- return;
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
- /* Ensure message from node is valid and communication is permitted */
- if (net_id != tn->net_id)
+ if (caps & TIPC_NODE_ID128)
+ memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
+ else
+ sprintf(peer_id, "%x", src);
+
+ err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
+ kfree_skb(skb);
+ if (err || maddr.broadcast) {
+ pr_warn_ratelimited("Rcv corrupt discovery message\n");
return;
- if (maddr.broadcast)
+ }
+ /* Ignore discovery messages from own node */
+ if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
return;
- if (!tipc_addr_domain_valid(ddom))
+ if (net_id != tn->net_id)
return;
- if (!tipc_addr_node_valid(onode))
+ if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
+ src, sugg, peer_id, mtyp))
return;
+ self = tipc_own_addr(net);
- if (in_own_node(net, onode)) {
- if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
- disc_dupl_alert(bearer, tn->own_addr, &maddr);
+ /* Message from somebody using this node's address */
+ if (in_own_node(net, src)) {
+ disc_dupl_alert(b, self, &maddr);
return;
}
- if (!tipc_in_scope(ddom, tn->own_addr))
+ if (!tipc_in_scope(legacy, dst, self))
return;
- if (!tipc_in_scope(bearer->domain, onode))
+ if (!tipc_in_scope(legacy, b->domain, src))
return;
-
- tipc_node_check_dest(net, onode, bearer, caps, signature,
+ tipc_node_check_dest(net, src, peer_id, b, caps, signature,
&maddr, &respond, &dupl_addr);
if (dupl_addr)
- disc_dupl_alert(bearer, onode, &maddr);
-
- /* Send response, if necessary */
- if (respond && (mtyp == DSC_REQ_MSG)) {
- rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
- if (!rskb)
- return;
- tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
- tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
- }
+ disc_dupl_alert(b, src, &maddr);
+ if (!respond)
+ return;
+ if (mtyp != DSC_REQ_MSG)
+ return;
+ tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
}
-/**
- * disc_update - update frequency of periodic link setup requests
- * @req: ptr to link request structure
- *
- * Reinitiates discovery process if discovery object has no associated nodes
- * and is either not currently searching or is searching at a slow rate
+/* tipc_disc_add_dest - increment set of discovered nodes
*/
-static void disc_update(struct tipc_link_req *req)
+void tipc_disc_add_dest(struct tipc_discoverer *d)
{
- if (!req->num_nodes) {
- if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
- (req->timer_intv > TIPC_LINK_REQ_FAST)) {
- req->timer_intv = TIPC_LINK_REQ_INIT;
- mod_timer(&req->timer, jiffies + req->timer_intv);
- }
- }
+ spin_lock_bh(&d->lock);
+ d->num_nodes++;
+ spin_unlock_bh(&d->lock);
}
-/**
- * tipc_disc_add_dest - increment set of discovered nodes
- * @req: ptr to link request structure
+/* tipc_disc_remove_dest - decrement set of discovered nodes
*/
-void tipc_disc_add_dest(struct tipc_link_req *req)
+void tipc_disc_remove_dest(struct tipc_discoverer *d)
{
- spin_lock_bh(&req->lock);
- req->num_nodes++;
- spin_unlock_bh(&req->lock);
-}
+ int intv, num;
-/**
- * tipc_disc_remove_dest - decrement set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_remove_dest(struct tipc_link_req *req)
-{
- spin_lock_bh(&req->lock);
- req->num_nodes--;
- disc_update(req);
- spin_unlock_bh(&req->lock);
+ spin_lock_bh(&d->lock);
+ d->num_nodes--;
+ num = d->num_nodes;
+ intv = d->timer_intv;
+ if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) {
+ d->timer_intv = TIPC_DISC_INIT;
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ }
+ spin_unlock_bh(&d->lock);
}
-/**
- * disc_timeout - send a periodic link setup request
- * @data: ptr to link request structure
- *
+/* tipc_disc_timeout - send a periodic link setup request
* Called whenever a link setup request timer associated with a bearer expires.
+ * - Keep doubling time between sent request until limit is reached;
+ * - Hold at fast polling rate if we don't have any associated nodes
+ * - Otherwise hold at slow polling rate
*/
-static void disc_timeout(struct timer_list *t)
+static void tipc_disc_timeout(struct timer_list *t)
{
- struct tipc_link_req *req = from_timer(req, t, timer);
- struct sk_buff *skb;
- int max_delay;
+ struct tipc_discoverer *d = from_timer(d, t, timer);
+ struct tipc_net *tn = tipc_net(d->net);
+ u32 self = tipc_own_addr(d->net);
+ struct tipc_media_addr maddr;
+ struct sk_buff *skb = NULL;
+ struct net *net = d->net;
+ u32 bearer_id;
- spin_lock_bh(&req->lock);
+ spin_lock_bh(&d->lock);
/* Stop searching if only desired node has been found */
- if (tipc_node(req->domain) && req->num_nodes) {
- req->timer_intv = TIPC_LINK_REQ_INACTIVE;
+ if (tipc_node(d->domain) && d->num_nodes) {
+ d->timer_intv = TIPC_DISC_INACTIVE;
goto exit;
}
- /*
- * Send discovery message, then update discovery timer
- *
- * Keep doubling time between requests until limit is reached;
- * hold at fast polling rate if don't have any associated nodes,
- * otherwise hold at slow polling rate
- */
- skb = skb_clone(req->buf, GFP_ATOMIC);
- if (skb)
- tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
- req->timer_intv *= 2;
- if (req->num_nodes)
- max_delay = TIPC_LINK_REQ_SLOW;
- else
- max_delay = TIPC_LINK_REQ_FAST;
- if (req->timer_intv > max_delay)
- req->timer_intv = max_delay;
+ /* Did we just leave the address trial period ? */
+ if (!self && !time_before(jiffies, tn->addr_trial_end)) {
+ self = tn->trial_addr;
+ tipc_net_finalize(net, self);
+ msg_set_prevnode(buf_msg(d->skb), self);
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ }
+
+ /* Adjust timeout interval according to discovery phase */
+ if (time_before(jiffies, tn->addr_trial_end)) {
+ d->timer_intv = TIPC_DISC_INIT;
+ } else {
+ d->timer_intv *= 2;
+ if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+ d->timer_intv = TIPC_DISC_SLOW;
+ else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+ d->timer_intv = TIPC_DISC_FAST;
+ }
- mod_timer(&req->timer, jiffies + req->timer_intv);
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ memcpy(&maddr, &d->dest, sizeof(maddr));
+ skb = skb_clone(d->skb, GFP_ATOMIC);
+ bearer_id = d->bearer_id;
exit:
- spin_unlock_bh(&req->lock);
+ spin_unlock_bh(&d->lock);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
}
/**
@@ -273,41 +339,47 @@ exit:
int tipc_disc_create(struct net *net, struct tipc_bearer *b,
struct tipc_media_addr *dest, struct sk_buff **skb)
{
- struct tipc_link_req *req;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_discoverer *d;
- req = kmalloc(sizeof(*req), GFP_ATOMIC);
- if (!req)
+ d = kmalloc(sizeof(*d), GFP_ATOMIC);
+ if (!d)
return -ENOMEM;
- req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
- if (!req->buf) {
- kfree(req);
+ d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+ if (!d->skb) {
+ kfree(d);
return -ENOMEM;
}
+ tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
- tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
- memcpy(&req->dest, dest, sizeof(*dest));
- req->net = net;
- req->bearer_id = b->identity;
- req->domain = b->domain;
- req->num_nodes = 0;
- req->timer_intv = TIPC_LINK_REQ_INIT;
- spin_lock_init(&req->lock);
- timer_setup(&req->timer, disc_timeout, 0);
- mod_timer(&req->timer, jiffies + req->timer_intv);
- b->link_req = req;
- *skb = skb_clone(req->buf, GFP_ATOMIC);
+ /* Do we need an address trial period first ? */
+ if (!tipc_own_addr(net)) {
+ tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+ msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+ }
+ memcpy(&d->dest, dest, sizeof(*dest));
+ d->net = net;
+ d->bearer_id = b->identity;
+ d->domain = b->domain;
+ d->num_nodes = 0;
+ d->timer_intv = TIPC_DISC_INIT;
+ spin_lock_init(&d->lock);
+ timer_setup(&d->timer, tipc_disc_timeout, 0);
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ b->disc = d;
+ *skb = skb_clone(d->skb, GFP_ATOMIC);
return 0;
}
/**
* tipc_disc_delete - destroy object sending periodic link setup requests
- * @req: ptr to link request structure
+ * @d: ptr to link duest structure
*/
-void tipc_disc_delete(struct tipc_link_req *req)
+void tipc_disc_delete(struct tipc_discoverer *d)
{
- del_timer_sync(&req->timer);
- kfree_skb(req->buf);
- kfree(req);
+ del_timer_sync(&d->timer);
+ kfree_skb(d->skb);
+ kfree(d);
}
/**
@@ -318,19 +390,21 @@ void tipc_disc_delete(struct tipc_link_req *req)
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
- struct tipc_link_req *req = b->link_req;
+ struct tipc_discoverer *d = b->disc;
+ struct tipc_media_addr maddr;
struct sk_buff *skb;
- spin_lock_bh(&req->lock);
- tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
- req->net = net;
- req->bearer_id = b->identity;
- req->domain = b->domain;
- req->num_nodes = 0;
- req->timer_intv = TIPC_LINK_REQ_INIT;
- mod_timer(&req->timer, jiffies + req->timer_intv);
- skb = skb_clone(req->buf, GFP_ATOMIC);
+ spin_lock_bh(&d->lock);
+ tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+ d->net = net;
+ d->bearer_id = b->identity;
+ d->domain = b->domain;
+ d->num_nodes = 0;
+ d->timer_intv = TIPC_DISC_INIT;
+ memcpy(&maddr, &d->dest, sizeof(maddr));
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ skb = skb_clone(d->skb, GFP_ATOMIC);
+ spin_unlock_bh(&d->lock);
if (skb)
- tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
- spin_unlock_bh(&req->lock);
+ tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
}
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index b80a335389c0..521d96c41dfd 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,14 +37,14 @@
#ifndef _TIPC_DISCOVER_H
#define _TIPC_DISCOVER_H
-struct tipc_link_req;
+struct tipc_discoverer;
int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest, struct sk_buff **skb);
-void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_delete(struct tipc_discoverer *req);
void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
-void tipc_disc_add_dest(struct tipc_link_req *req);
-void tipc_disc_remove_dest(struct tipc_link_req *req);
+void tipc_disc_add_dest(struct tipc_discoverer *req);
+void tipc_disc_remove_dest(struct tipc_discoverer *req);
void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
struct tipc_bearer *b_ptr);
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 122162a31816..d7a7befeddd4 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -37,7 +37,7 @@
#include "addr.h"
#include "group.h"
#include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
#include "msg.h"
#include "socket.h"
#include "node.h"
@@ -189,6 +189,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
grp->open = group_is_open;
+ *grp->open = false;
filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
filter, &grp->subid))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2d6b2aed30e0..695acb783969 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -434,14 +434,16 @@ char *tipc_link_name(struct tipc_link *l)
*/
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 ownnode, u32 peer,
- u16 peer_caps,
+ int window, u32 session, u32 self,
+ u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link **link)
{
+ char peer_str[NODE_ID_STR_LEN] = {0,};
+ char self_str[NODE_ID_STR_LEN] = {0,};
struct tipc_link *l;
l = kzalloc(sizeof(*l), GFP_ATOMIC);
@@ -450,10 +452,19 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
*link = l;
l->session = session;
- /* Note: peer i/f name is completed by reset/activate message */
- sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
- tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
- if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+ /* Set link name for unicast links only */
+ if (peer_id) {
+ tipc_nodeid2string(self_str, tipc_own_id(net));
+ if (strlen(self_str) > 16)
+ sprintf(self_str, "%x", self);
+ tipc_nodeid2string(peer_str, peer_id);
+ if (strlen(peer_str) > 16)
+ sprintf(peer_str, "%x", peer);
+ }
+ /* Peer i/f name will be completed by reset/activate message */
+ snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown",
+ self_str, if_name, peer_str);
+
strcpy(l->if_name, if_name);
l->addr = peer;
l->peer_caps = peer_caps;
@@ -501,7 +512,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
struct tipc_link *l;
if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
- 0, ownnode, peer, peer_caps, bc_sndlink,
+ 0, ownnode, peer, NULL, peer_caps, bc_sndlink,
NULL, inputq, namedq, link))
return false;
@@ -1800,7 +1811,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
{
- int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
+ int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE);
l->window = win;
l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win);
@@ -1938,11 +1949,11 @@ msg_full:
int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
struct tipc_link *link, int nlflags)
{
- int err;
- void *hdr;
+ u32 self = tipc_own_addr(net);
struct nlattr *attrs;
struct nlattr *prop;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ void *hdr;
+ int err;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
nlflags, TIPC_NL_LINK_GET);
@@ -1955,8 +1966,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
- tipc_cluster_mask(tn->own_addr)))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self)))
goto attr_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
goto attr_msg_full;
@@ -2126,7 +2136,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+ if (link_is_up(l))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d1bd1787a768..ec59348a81e8 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -73,8 +73,8 @@ enum {
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 ownnode, u32 peer,
- u16 peer_caps,
+ int window, u32 session, u32 ownnode,
+ u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
struct sk_buff_head *inputq,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4e1c6f6450bb..b6c45dccba3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
msg = buf_msg(skb);
if (msg_reroute_cnt(msg))
return false;
- dnode = addr_domain(net, msg_lookup_scope(msg));
+ dnode = tipc_scope2node(net, msg_lookup_scope(msg));
dport = tipc_nametbl_translate(net, msg_nametype(msg),
msg_nameinst(msg), &dnode);
if (!dport)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index b4ba1b4f9ae7..a4e944d59394 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -550,6 +550,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
*/
#define DSC_REQ_MSG 0
#define DSC_RESP_MSG 1
+#define DSC_TRIAL_MSG 2
+#define DSC_TRIAL_FAIL_MSG 3
/*
* Group protocol message types
@@ -627,7 +629,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n)
msg_set_bits(m, 2, 0, 0xffff, n);
}
-
/*
* Word 4
*/
@@ -925,6 +926,26 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
+static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
+{
+ return msg_word(m, 14);
+}
+
+static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 14, n);
+}
+
+static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id)
+{
+ memcpy(msg_data(hdr), id, 16);
+}
+
+static inline u8 *msg_node_id(struct tipc_msg *hdr)
+{
+ return (u8 *)msg_data(hdr);
+}
+
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
bool tipc_msg_validate(struct sk_buff **_skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..51b4b96f89db 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
i->type = htonl(p->type);
i->lower = htonl(p->lower);
i->upper = htonl(p->upper);
- i->ref = htonl(p->ref);
+ i->port = htonl(p->port);
i->key = htonl(p->key);
}
@@ -68,14 +68,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
u32 dest)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+ u32 self = tipc_own_addr(net);
struct tipc_msg *msg;
if (buf != NULL) {
msg = buf_msg(buf);
- tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
- INT_H_SIZE, dest);
+ tipc_msg_init(self, msg, NAME_DISTRIBUTOR,
+ type, INT_H_SIZE, dest);
msg_set_size(msg, INT_H_SIZE + size);
}
return buf;
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
*/
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff *buf;
+ struct name_table *nt = tipc_name_table(net);
struct distr_item *item;
+ struct sk_buff *skb;
- list_add_tail_rcu(&publ->local_list,
- &tn->nametbl->publ_list[publ->scope]);
-
- if (publ->scope == TIPC_NODE_SCOPE)
+ if (publ->scope == TIPC_NODE_SCOPE) {
+ list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
return NULL;
+ }
+ list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
- buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Publication distribution failure\n");
return NULL;
}
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
- return buf;
+ return skb;
}
/**
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
struct sk_buff *buf;
struct distr_item *item;
- list_del(&publ->local_list);
+ list_del(&publ->binding_node);
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
- list_for_each_entry(publ, pls, local_list) {
+ list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
if (!skb) {
skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
*/
void tipc_named_node_up(struct net *net, u32 dnode)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct name_table *nt = tipc_name_table(net);
struct sk_buff_head head;
__skb_queue_head_init(&head);
rcu_read_lock();
- named_distribute(net, &head, dnode,
- &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
- named_distribute(net, &head, dnode,
- &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+ named_distribute(net, &head, dnode, &nt->cluster_scope);
rcu_read_unlock();
tipc_node_xmit(net, &head, dnode, 0);
@@ -207,20 +204,20 @@ void tipc_named_node_up(struct net *net, u32 dnode)
*/
static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
struct publication *p;
spin_lock_bh(&tn->nametbl_lock);
- p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
- publ->node, publ->ref, publ->key);
+ p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper,
+ publ->node, publ->key);
if (p)
- tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+ tipc_node_unsubscribe(net, &p->binding_node, addr);
spin_unlock_bh(&tn->nametbl_lock);
if (p != publ) {
pr_err("Unable to remove publication from failed node\n"
- " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
- publ->type, publ->lower, publ->node, publ->ref,
+ " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node, publ->port,
publ->key);
}
@@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
{
struct publication *publ, *tmp;
- list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+ list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
tipc_dist_queue_purge(net, addr);
}
@@ -264,28 +261,31 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
u32 node, u32 dtype)
{
- struct publication *publ = NULL;
+ struct publication *p = NULL;
+ u32 lower = ntohl(i->lower);
+ u32 upper = ntohl(i->upper);
+ u32 type = ntohl(i->type);
+ u32 port = ntohl(i->port);
+ u32 key = ntohl(i->key);
if (dtype == PUBLICATION) {
- publ = tipc_nametbl_insert_publ(net, ntohl(i->type),
- ntohl(i->lower),
- ntohl(i->upper),
- TIPC_CLUSTER_SCOPE, node,
- ntohl(i->ref), ntohl(i->key));
- if (publ) {
- tipc_node_subscribe(net, &publ->nodesub_list, node);
+ p = tipc_nametbl_insert_publ(net, type, lower, upper,
+ TIPC_CLUSTER_SCOPE, node,
+ port, key);
+ if (p) {
+ tipc_node_subscribe(net, &p->binding_node, node);
return true;
}
} else if (dtype == WITHDRAWAL) {
- publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
- ntohl(i->lower),
- node, ntohl(i->ref),
- ntohl(i->key));
- if (publ) {
- tipc_node_unsubscribe(net, &publ->nodesub_list, node);
- kfree_rcu(publ, rcu);
+ p = tipc_nametbl_remove_publ(net, type, lower,
+ upper, node, key);
+ if (p) {
+ tipc_node_unsubscribe(net, &p->binding_node, node);
+ kfree_rcu(p, rcu);
return true;
}
+ pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
+ type, lower, node);
} else {
pr_warn("Unrecognized name table message received\n");
}
@@ -293,55 +293,6 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
}
/**
- * tipc_named_add_backlog - add a failed name table update to the backlog
- *
- */
-static void tipc_named_add_backlog(struct net *net, struct distr_item *i,
- u32 type, u32 node)
-{
- struct distr_queue_item *e;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- unsigned long now = get_jiffies_64();
-
- e = kzalloc(sizeof(*e), GFP_ATOMIC);
- if (!e)
- return;
- e->dtype = type;
- e->node = node;
- e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout);
- memcpy(e, i, sizeof(*i));
- list_add_tail(&e->next, &tn->dist_queue);
-}
-
-/**
- * tipc_named_process_backlog - try to process any pending name table updates
- * from the network.
- */
-void tipc_named_process_backlog(struct net *net)
-{
- struct distr_queue_item *e, *tmp;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- char addr[16];
- unsigned long now = get_jiffies_64();
-
- list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
- if (time_after(e->expires, now)) {
- if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
- continue;
- } else {
- tipc_addr_string_fill(addr, e->node);
- pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
- e->dtype, ntohl(e->i.type),
- ntohl(e->i.lower),
- ntohl(e->i.upper),
- addr, ntohl(e->i.key));
- }
- list_del(&e->next);
- kfree(e);
- }
-}
-
-/**
* tipc_named_rcv - process name table update messages sent by another node
*/
void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
@@ -363,12 +314,10 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
count = msg_data_sz(msg) / ITEM_SIZE;
node = msg_orignode(msg);
while (count--) {
- if (!tipc_update_nametbl(net, item, node, mtype))
- tipc_named_add_backlog(net, item, mtype, node);
+ tipc_update_nametbl(net, item, node, mtype);
item++;
}
kfree_skb(skb);
- tipc_named_process_backlog(net);
}
spin_unlock_bh(&tn->nametbl_lock);
}
@@ -382,16 +331,17 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
*/
void tipc_named_reinit(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct publication *publ;
- int scope;
+ u32 self = tipc_own_addr(net);
spin_lock_bh(&tn->nametbl_lock);
- for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
- list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
- local_list)
- publ->node = tn->own_addr;
+ list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
+ publ->node = self;
+ list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
+ publ->node = self;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1264ba0af937..63fc73e0fa6c 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -63,7 +63,7 @@ struct distr_item {
__be32 type;
__be32 lower;
__be32 upper;
- __be32 ref;
+ __be32 port;
__be32 key;
};
@@ -72,7 +72,6 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
void tipc_named_node_up(struct net *net, u32 dnode);
void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
void tipc_named_reinit(struct net *net);
-void tipc_named_process_backlog(struct net *net);
void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ed0457cc99d6..b1fe20972aa9 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
/*
* net/tipc/name_table.c: TIPC name table code
*
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2008, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -44,64 +44,40 @@
#include "addr.h"
#include "node.h"
#include "group.h"
-#include <net/genetlink.h>
-
-#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
/**
- * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- * publications of the associated name sequence belong to it.
- * (The cluster and node lists may be empty.)
+ * struct service_range - container for all bindings of a service range
+ * @lower: service range lower bound
+ * @upper: service range upper bound
+ * @tree_node: member of service range RB tree
+ * @local_publ: list of identical publications made from this node
+ * Used by closest_first lookup and multicast lookup algorithm
+ * @all_publ: all publications identical to this one, whatever node and scope
+ * Used by round-robin lookup algorithm
*/
-struct name_info {
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
- u32 node_list_size;
- u32 cluster_list_size;
- u32 zone_list_size;
-};
-
-/**
- * struct sub_seq - container for all published instances of a name sequence
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @info: pointer to name sequence publication info
- */
-struct sub_seq {
+struct service_range {
u32 lower;
u32 upper;
- struct name_info *info;
+ struct rb_node tree_node;
+ struct list_head local_publ;
+ struct list_head all_publ;
};
/**
- * struct name_seq - container for all published instances of a name type
- * @type: 32 bit 'type' value for name sequence
- * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
- * sub-sequences are sorted in ascending order
- * @alloc: number of sub-sequences currently in array
- * @first_free: array index of first unused sub-sequence entry
- * @ns_list: links to adjacent name sequences in hash chain
- * @subscriptions: list of subscriptions for this 'type'
- * @lock: spinlock controlling access to publication lists of all sub-sequences
+ * struct tipc_service - container for all published instances of a service type
+ * @type: 32 bit 'type' value for service
+ * @ranges: rb tree containing all service ranges for this service
+ * @service_list: links to adjacent name ranges in hash chain
+ * @subscriptions: list of subscriptions for this service type
+ * @lock: spinlock controlling access to pertaining service ranges/publications
* @rcu: RCU callback head used for deferred freeing
*/
-struct name_seq {
+struct tipc_service {
u32 type;
- struct sub_seq *sseqs;
- u32 alloc;
- u32 first_free;
- struct hlist_node ns_list;
+ struct rb_root ranges;
+ struct hlist_node service_list;
struct list_head subscriptions;
- spinlock_t lock;
+ spinlock_t lock; /* Covers service range list */
struct rcu_head rcu;
};
@@ -111,494 +87,380 @@ static int hash(int x)
}
/**
- * publ_create - create a publication structure
+ * tipc_publ_create - create a publication structure
*/
-static struct publication *publ_create(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port_ref,
- u32 key)
+static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
+ u32 scope, u32 node, u32 port,
+ u32 key)
{
struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
- if (publ == NULL) {
- pr_warn("Publication creation failure, no memory\n");
+
+ if (!publ)
return NULL;
- }
publ->type = type;
publ->lower = lower;
publ->upper = upper;
publ->scope = scope;
publ->node = node;
- publ->ref = port_ref;
+ publ->port = port;
publ->key = key;
- INIT_LIST_HEAD(&publ->pport_list);
+ INIT_LIST_HEAD(&publ->binding_sock);
+ INIT_LIST_HEAD(&publ->binding_node);
+ INIT_LIST_HEAD(&publ->local_publ);
+ INIT_LIST_HEAD(&publ->all_publ);
return publ;
}
/**
- * tipc_subseq_alloc - allocate a specified number of sub-sequence structures
- */
-static struct sub_seq *tipc_subseq_alloc(u32 cnt)
-{
- return kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC);
-}
-
-/**
- * tipc_nameseq_create - create a name sequence structure for the specified 'type'
+ * tipc_service_create - create a service structure for the specified 'type'
*
- * Allocates a single sub-sequence structure and sets it to all 0's.
+ * Allocates a single range structure and sets it to all 0's.
*/
-static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
+static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
{
- struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC);
- struct sub_seq *sseq = tipc_subseq_alloc(1);
+ struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
- if (!nseq || !sseq) {
- pr_warn("Name sequence creation failed, no memory\n");
- kfree(nseq);
- kfree(sseq);
+ if (!service) {
+ pr_warn("Service creation failed, no memory\n");
return NULL;
}
- spin_lock_init(&nseq->lock);
- nseq->type = type;
- nseq->sseqs = sseq;
- nseq->alloc = 1;
- INIT_HLIST_NODE(&nseq->ns_list);
- INIT_LIST_HEAD(&nseq->subscriptions);
- hlist_add_head_rcu(&nseq->ns_list, seq_head);
- return nseq;
+ spin_lock_init(&service->lock);
+ service->type = type;
+ service->ranges = RB_ROOT;
+ INIT_HLIST_NODE(&service->service_list);
+ INIT_LIST_HEAD(&service->subscriptions);
+ hlist_add_head_rcu(&service->service_list, hd);
+ return service;
}
/**
- * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
+ * tipc_service_find_range - find service range matching a service instance
*
- * Very time-critical, so binary searches through sub-sequence array.
+ * Very time-critical, so binary search through range rb tree
*/
-static struct sub_seq *nameseq_find_subseq(struct name_seq *nseq,
- u32 instance)
+static struct service_range *tipc_service_find_range(struct tipc_service *sc,
+ u32 instance)
{
- struct sub_seq *sseqs = nseq->sseqs;
- int low = 0;
- int high = nseq->first_free - 1;
- int mid;
-
- while (low <= high) {
- mid = (low + high) / 2;
- if (instance < sseqs[mid].lower)
- high = mid - 1;
- else if (instance > sseqs[mid].upper)
- low = mid + 1;
+ struct rb_node *n = sc->ranges.rb_node;
+ struct service_range *sr;
+
+ while (n) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->lower > instance)
+ n = n->rb_left;
+ else if (sr->upper < instance)
+ n = n->rb_right;
else
- return &sseqs[mid];
+ return sr;
}
return NULL;
}
-/**
- * nameseq_locate_subseq - determine position of name instance in sub-sequence
- *
- * Returns index in sub-sequence array of the entry that contains the specified
- * instance value; if no entry contains that value, returns the position
- * where a new entry for it would be inserted in the array.
- *
- * Note: Similar to binary search code for locating a sub-sequence.
- */
-static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance)
+static struct service_range *tipc_service_create_range(struct tipc_service *sc,
+ u32 lower, u32 upper)
{
- struct sub_seq *sseqs = nseq->sseqs;
- int low = 0;
- int high = nseq->first_free - 1;
- int mid;
-
- while (low <= high) {
- mid = (low + high) / 2;
- if (instance < sseqs[mid].lower)
- high = mid - 1;
- else if (instance > sseqs[mid].upper)
- low = mid + 1;
+ struct rb_node **n, *parent = NULL;
+ struct service_range *sr, *tmp;
+
+ n = &sc->ranges.rb_node;
+ while (*n) {
+ tmp = container_of(*n, struct service_range, tree_node);
+ parent = *n;
+ tmp = container_of(parent, struct service_range, tree_node);
+ if (lower < tmp->lower)
+ n = &(*n)->rb_left;
+ else if (lower > tmp->lower)
+ n = &(*n)->rb_right;
+ else if (upper < tmp->upper)
+ n = &(*n)->rb_left;
+ else if (upper > tmp->upper)
+ n = &(*n)->rb_right;
else
- return mid;
+ return tmp;
}
- return low;
+ sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
+ if (!sr)
+ return NULL;
+ sr->lower = lower;
+ sr->upper = upper;
+ INIT_LIST_HEAD(&sr->local_publ);
+ INIT_LIST_HEAD(&sr->all_publ);
+ rb_link_node(&sr->tree_node, parent, n);
+ rb_insert_color(&sr->tree_node, &sc->ranges);
+ return sr;
}
-/**
- * tipc_nameseq_insert_publ
- */
-static struct publication *tipc_nameseq_insert_publ(struct net *net,
- struct name_seq *nseq,
+static struct publication *tipc_service_insert_publ(struct net *net,
+ struct tipc_service *sc,
u32 type, u32 lower,
u32 upper, u32 scope,
- u32 node, u32 port, u32 key)
+ u32 node, u32 port,
+ u32 key)
{
- struct tipc_subscription *s;
- struct tipc_subscription *st;
- struct publication *publ;
- struct sub_seq *sseq;
- struct name_info *info;
- int created_subseq = 0;
-
- sseq = nameseq_find_subseq(nseq, lower);
- if (sseq) {
-
- /* Lower end overlaps existing entry => need an exact match */
- if ((sseq->lower != lower) || (sseq->upper != upper)) {
- return NULL;
- }
-
- info = sseq->info;
-
- /* Check if an identical publication already exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->ref == port) && (publ->key == key) &&
- (!publ->node || (publ->node == node)))
- return NULL;
- }
- } else {
- u32 inspos;
- struct sub_seq *freesseq;
-
- /* Find where lower end should be inserted */
- inspos = nameseq_locate_subseq(nseq, lower);
-
- /* Fail if upper end overlaps into an existing entry */
- if ((inspos < nseq->first_free) &&
- (upper >= nseq->sseqs[inspos].lower)) {
- return NULL;
- }
+ struct tipc_subscription *sub, *tmp;
+ struct service_range *sr;
+ struct publication *p;
+ bool first = false;
- /* Ensure there is space for new sub-sequence */
- if (nseq->first_free == nseq->alloc) {
- struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
+ sr = tipc_service_create_range(sc, lower, upper);
+ if (!sr)
+ goto err;
- if (!sseqs) {
- pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
- return NULL;
- }
- memcpy(sseqs, nseq->sseqs,
- nseq->alloc * sizeof(struct sub_seq));
- kfree(nseq->sseqs);
- nseq->sseqs = sseqs;
- nseq->alloc *= 2;
- }
+ first = list_empty(&sr->all_publ);
- info = kzalloc(sizeof(*info), GFP_ATOMIC);
- if (!info) {
- pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
+ /* Return if the publication already exists */
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (p->key == key && (!p->node || p->node == node))
return NULL;
- }
-
- INIT_LIST_HEAD(&info->node_list);
- INIT_LIST_HEAD(&info->cluster_list);
- INIT_LIST_HEAD(&info->zone_list);
-
- /* Insert new sub-sequence */
- sseq = &nseq->sseqs[inspos];
- freesseq = &nseq->sseqs[nseq->first_free];
- memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq));
- memset(sseq, 0, sizeof(*sseq));
- nseq->first_free++;
- sseq->lower = lower;
- sseq->upper = upper;
- sseq->info = info;
- created_subseq = 1;
}
- /* Insert a publication */
- publ = publ_create(type, lower, upper, scope, node, port, key);
- if (!publ)
- return NULL;
-
- list_add(&publ->zone_list, &info->zone_list);
- info->zone_list_size++;
-
- if (in_own_cluster(net, node)) {
- list_add(&publ->cluster_list, &info->cluster_list);
- info->cluster_list_size++;
- }
-
- if (in_own_node(net, node)) {
- list_add(&publ->node_list, &info->node_list);
- info->node_list_size++;
- }
+ /* Create and insert publication */
+ p = tipc_publ_create(type, lower, upper, scope, node, port, key);
+ if (!p)
+ goto err;
+ if (in_own_node(net, node))
+ list_add(&p->local_publ, &sr->local_publ);
+ list_add(&p->all_publ, &sr->all_publ);
/* Any subscriptions waiting for notification? */
- list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
- TIPC_PUBLISHED, publ->ref,
- publ->node, publ->scope,
- created_subseq);
+ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+ tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED,
+ p->port, p->node, p->scope, first);
}
- return publ;
+ return p;
+err:
+ pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper);
+ return NULL;
}
/**
- * tipc_nameseq_remove_publ
- *
- * NOTE: There may be cases where TIPC is asked to remove a publication
- * that is not in the name table. For example, if another node issues a
- * publication for a name sequence that overlaps an existing name sequence
- * the publication will not be recorded, which means the publication won't
- * be found when the name sequence is later withdrawn by that node.
- * A failed withdraw request simply returns a failure indication and lets the
- * caller issue any error or warning messages associated with such a problem.
+ * tipc_service_remove_publ - remove a publication from a service
*/
-static struct publication *tipc_nameseq_remove_publ(struct net *net,
- struct name_seq *nseq,
- u32 inst, u32 node,
- u32 ref, u32 key)
+static struct publication *tipc_service_remove_publ(struct net *net,
+ struct tipc_service *sc,
+ u32 lower, u32 upper,
+ u32 node, u32 key)
{
- struct publication *publ;
- struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
- struct name_info *info;
- struct sub_seq *free;
- struct tipc_subscription *s, *st;
- int removed_subseq = 0;
-
- if (!sseq)
- return NULL;
+ struct tipc_subscription *sub, *tmp;
+ struct service_range *sr;
+ struct publication *p;
+ bool found = false;
+ bool last = false;
+ struct rb_node *n;
- info = sseq->info;
+ sr = tipc_service_find_range(sc, lower);
+ if (!sr)
+ return NULL;
- /* Locate publication, if it exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->key == key) && (publ->ref == ref) &&
- (!publ->node || (publ->node == node)))
- goto found;
+ /* Find exact matching service range */
+ for (n = &sr->tree_node; n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->upper == upper)
+ break;
}
- return NULL;
-
-found:
- /* Remove publication from zone scope list */
- list_del(&publ->zone_list);
- info->zone_list_size--;
+ if (!n || sr->lower != lower || sr->upper != upper)
+ return NULL;
- /* Remove publication from cluster scope list, if present */
- if (in_own_cluster(net, node)) {
- list_del(&publ->cluster_list);
- info->cluster_list_size--;
+ /* Find publication, if it exists */
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (p->key != key || (node && node != p->node))
+ continue;
+ found = true;
+ break;
}
+ if (!found)
+ return NULL;
- /* Remove publication from node scope list, if present */
- if (in_own_node(net, node)) {
- list_del(&publ->node_list);
- info->node_list_size--;
- }
+ list_del(&p->all_publ);
+ list_del(&p->local_publ);
- /* Contract subseq list if no more publications for that subseq */
- if (list_empty(&info->zone_list)) {
- kfree(info);
- free = &nseq->sseqs[nseq->first_free--];
- memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
- removed_subseq = 1;
+ /* Remove service range item if this was its last publication */
+ if (list_empty(&sr->all_publ)) {
+ last = true;
+ rb_erase(&sr->tree_node, &sc->ranges);
+ kfree(sr);
}
/* Notify any waiting subscriptions */
- list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
- TIPC_WITHDRAWN, publ->ref,
- publ->node, publ->scope,
- removed_subseq);
+ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+ tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_WITHDRAWN,
+ p->port, p->node, p->scope, last);
}
-
- return publ;
+ return p;
}
/**
- * tipc_nameseq_subscribe - attach a subscription, and optionally
- * issue the prescribed number of events if there is any sub-
- * sequence overlapping with the requested sequence
+ * tipc_service_subscribe - attach a subscription, and optionally
+ * issue the prescribed number of events if there is any service
+ * range overlapping with the requested range
*/
-static void tipc_nameseq_subscribe(struct name_seq *nseq,
- struct tipc_subscription *s,
- bool status)
+static void tipc_service_subscribe(struct tipc_service *service,
+ struct tipc_subscription *sub)
{
- struct sub_seq *sseq = nseq->sseqs;
+ struct tipc_subscr *sb = &sub->evt.s;
+ struct service_range *sr;
struct tipc_name_seq ns;
+ struct publication *p;
+ struct rb_node *n;
+ bool first;
- tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ ns.type = tipc_sub_read(sb, seq.type);
+ ns.lower = tipc_sub_read(sb, seq.lower);
+ ns.upper = tipc_sub_read(sb, seq.upper);
- tipc_subscrp_get(s);
- list_add(&s->nameseq_list, &nseq->subscriptions);
+ tipc_sub_get(sub);
+ list_add(&sub->service_list, &service->subscriptions);
- if (!status || !sseq)
+ if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS)
return;
- while (sseq != &nseq->sseqs[nseq->first_free]) {
- if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
- struct publication *crs;
- struct name_info *info = sseq->info;
- int must_report = 1;
-
- list_for_each_entry(crs, &info->zone_list, zone_list) {
- tipc_subscrp_report_overlap(s, sseq->lower,
- sseq->upper,
- TIPC_PUBLISHED,
- crs->ref, crs->node,
- crs->scope,
- must_report);
- must_report = 0;
- }
+ for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->lower > ns.upper)
+ break;
+ if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
+ continue;
+ first = true;
+
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ tipc_sub_report_overlap(sub, sr->lower, sr->upper,
+ TIPC_PUBLISHED, p->port,
+ p->node, p->scope, first);
+ first = false;
}
- sseq++;
}
}
-static struct name_seq *nametbl_find_seq(struct net *net, u32 type)
+static struct tipc_service *tipc_service_find(struct net *net, u32 type)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct hlist_head *seq_head;
- struct name_seq *ns;
-
- seq_head = &tn->nametbl->seq_hlist[hash(type)];
- hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
- if (ns->type == type)
- return ns;
+ struct name_table *nt = tipc_name_table(net);
+ struct hlist_head *service_head;
+ struct tipc_service *service;
+
+ service_head = &nt->services[hash(type)];
+ hlist_for_each_entry_rcu(service, service_head, service_list) {
+ if (service->type == type)
+ return service;
}
-
return NULL;
};
struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
- u32 lower, u32 upper, u32 scope,
- u32 node, u32 port, u32 key)
+ u32 lower, u32 upper,
+ u32 scope, u32 node,
+ u32 port, u32 key)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct publication *publ;
- struct name_seq *seq = nametbl_find_seq(net, type);
- int index = hash(type);
-
- if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
- (lower > upper)) {
- pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_service *sc;
+ struct publication *p;
+
+ if (scope > TIPC_NODE_SCOPE || lower > upper) {
+ pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
type, lower, upper, scope);
return NULL;
}
-
- if (!seq)
- seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
- if (!seq)
+ sc = tipc_service_find(net, type);
+ if (!sc)
+ sc = tipc_service_create(type, &nt->services[hash(type)]);
+ if (!sc)
return NULL;
- spin_lock_bh(&seq->lock);
- publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper,
- scope, node, port, key);
- spin_unlock_bh(&seq->lock);
- return publ;
+ spin_lock_bh(&sc->lock);
+ p = tipc_service_insert_publ(net, sc, type, lower, upper,
+ scope, node, port, key);
+ spin_unlock_bh(&sc->lock);
+ return p;
}
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
- u32 lower, u32 node, u32 ref,
- u32 key)
+ u32 lower, u32 upper,
+ u32 node, u32 key)
{
- struct publication *publ;
- struct name_seq *seq = nametbl_find_seq(net, type);
+ struct tipc_service *sc = tipc_service_find(net, type);
+ struct publication *p = NULL;
- if (!seq)
+ if (!sc)
return NULL;
- spin_lock_bh(&seq->lock);
- publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
- if (!seq->first_free && list_empty(&seq->subscriptions)) {
- hlist_del_init_rcu(&seq->ns_list);
- kfree(seq->sseqs);
- spin_unlock_bh(&seq->lock);
- kfree_rcu(seq, rcu);
- return publ;
+ spin_lock_bh(&sc->lock);
+ p = tipc_service_remove_publ(net, sc, lower, upper, node, key);
+
+ /* Delete service item if this no more publications and subscriptions */
+ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+ hlist_del_init_rcu(&sc->service_list);
+ kfree_rcu(sc, rcu);
}
- spin_unlock_bh(&seq->lock);
- return publ;
+ spin_unlock_bh(&sc->lock);
+ return p;
}
/**
- * tipc_nametbl_translate - perform name translation
+ * tipc_nametbl_translate - perform service instance to socket translation
*
- * On entry, 'destnode' is the search domain used during translation.
+ * On entry, 'dnode' is the search domain used during translation.
*
* On exit:
- * - if name translation is deferred to another node/cluster/zone,
- * leaves 'destnode' unchanged (will be non-zero) and returns 0
- * - if name translation is attempted and succeeds, sets 'destnode'
- * to publishing node and returns port reference (will be non-zero)
- * - if name translation is attempted and fails, sets 'destnode' to 0
- * and returns 0
+ * - if translation is deferred to another node, leave 'dnode' unchanged and
+ * return 0
+ * - if translation is attempted and succeeds, set 'dnode' to the publishing
+ * node and return the published (non-zero) port number
+ * - if translation is attempted and fails, set 'dnode' to 0 and return 0
+ *
+ * Note that for legacy users (node configured with Z.C.N address format) the
+ * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
+ * we must look in the local binding list first
*/
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
- u32 *destnode)
+u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sub_seq *sseq;
- struct name_info *info;
- struct publication *publ;
- struct name_seq *seq;
- u32 ref = 0;
+ struct tipc_net *tn = tipc_net(net);
+ bool legacy = tn->legacy_addr_format;
+ u32 self = tipc_own_addr(net);
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct list_head *list;
+ struct publication *p;
+ u32 port = 0;
u32 node = 0;
- if (!tipc_in_scope(*destnode, tn->own_addr))
+ if (!tipc_in_scope(legacy, *dnode, self))
return 0;
rcu_read_lock();
- seq = nametbl_find_seq(net, type);
- if (unlikely(!seq))
+ sc = tipc_service_find(net, type);
+ if (unlikely(!sc))
goto not_found;
- spin_lock_bh(&seq->lock);
- sseq = nameseq_find_subseq(seq, instance);
- if (unlikely(!sseq))
+
+ spin_lock_bh(&sc->lock);
+ sr = tipc_service_find_range(sc, instance);
+ if (unlikely(!sr))
goto no_match;
- info = sseq->info;
-
- /* Closest-First Algorithm */
- if (likely(!*destnode)) {
- if (!list_empty(&info->node_list)) {
- publ = list_first_entry(&info->node_list,
- struct publication,
- node_list);
- list_move_tail(&publ->node_list,
- &info->node_list);
- } else if (!list_empty(&info->cluster_list)) {
- publ = list_first_entry(&info->cluster_list,
- struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list,
- &info->cluster_list);
- } else {
- publ = list_first_entry(&info->zone_list,
- struct publication,
- zone_list);
- list_move_tail(&publ->zone_list,
- &info->zone_list);
- }
- }
- /* Round-Robin Algorithm */
- else if (*destnode == tn->own_addr) {
- if (list_empty(&info->node_list))
- goto no_match;
- publ = list_first_entry(&info->node_list, struct publication,
- node_list);
- list_move_tail(&publ->node_list, &info->node_list);
- } else if (in_own_cluster_exact(net, *destnode)) {
- if (list_empty(&info->cluster_list))
+ /* Select lookup algorithm: local, closest-first or round-robin */
+ if (*dnode == self) {
+ list = &sr->local_publ;
+ if (list_empty(list))
goto no_match;
- publ = list_first_entry(&info->cluster_list, struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list, &info->cluster_list);
+ p = list_first_entry(list, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
+ list = &sr->local_publ;
+ p = list_first_entry(list, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
} else {
- publ = list_first_entry(&info->zone_list, struct publication,
- zone_list);
- list_move_tail(&publ->zone_list, &info->zone_list);
+ list = &sr->all_publ;
+ p = list_first_entry(list, struct publication, all_publ);
+ list_move_tail(&p->all_publ, &sr->all_publ);
}
-
- ref = publ->ref;
- node = publ->node;
+ port = p->port;
+ node = p->node;
no_match:
- spin_unlock_bh(&seq->lock);
+ spin_unlock_bh(&sc->lock);
not_found:
rcu_read_unlock();
- *destnode = node;
- return ref;
+ *dnode = node;
+ return port;
}
bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -606,102 +468,102 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
bool all)
{
u32 self = tipc_own_addr(net);
- struct publication *publ;
- struct name_info *info;
- struct name_seq *seq;
- struct sub_seq *sseq;
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
*dstcnt = 0;
rcu_read_lock();
- seq = nametbl_find_seq(net, type);
- if (unlikely(!seq))
+ sc = tipc_service_find(net, type);
+ if (unlikely(!sc))
goto exit;
- spin_lock_bh(&seq->lock);
- sseq = nameseq_find_subseq(seq, instance);
- if (likely(sseq)) {
- info = sseq->info;
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if (publ->scope != scope)
- continue;
- if (publ->ref == exclude && publ->node == self)
- continue;
- tipc_dest_push(dsts, publ->node, publ->ref);
- (*dstcnt)++;
- if (all)
- continue;
- list_move_tail(&publ->zone_list, &info->zone_list);
- break;
- }
+
+ spin_lock_bh(&sc->lock);
+
+ sr = tipc_service_find_range(sc, instance);
+ if (!sr)
+ goto no_match;
+
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (p->scope != scope)
+ continue;
+ if (p->port == exclude && p->node == self)
+ continue;
+ tipc_dest_push(dsts, p->node, p->port);
+ (*dstcnt)++;
+ if (all)
+ continue;
+ list_move_tail(&p->all_publ, &sr->all_publ);
+ break;
}
- spin_unlock_bh(&seq->lock);
+no_match:
+ spin_unlock_bh(&sc->lock);
exit:
rcu_read_unlock();
return !list_empty(dsts);
}
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports)
{
- struct sub_seq *sseq_stop;
- struct name_info *info;
+ struct service_range *sr;
+ struct tipc_service *sc;
struct publication *p;
- struct name_seq *seq;
- struct sub_seq *sseq;
- int res = 0;
+ struct rb_node *n;
rcu_read_lock();
- seq = nametbl_find_seq(net, type);
- if (!seq)
+ sc = tipc_service_find(net, type);
+ if (!sc)
goto exit;
- spin_lock_bh(&seq->lock);
- sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
- sseq_stop = seq->sseqs + seq->first_free;
- for (; sseq != sseq_stop; sseq++) {
- if (sseq->lower > upper)
+ spin_lock_bh(&sc->lock);
+
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->upper < lower)
+ continue;
+ if (sr->lower > upper)
break;
- info = sseq->info;
- list_for_each_entry(p, &info->node_list, node_list) {
+ list_for_each_entry(p, &sr->local_publ, local_publ) {
if (p->scope == scope || (!exact && p->scope < scope))
- tipc_dest_push(dports, 0, p->ref);
+ tipc_dest_push(dports, 0, p->port);
}
-
- if (info->cluster_list_size != info->node_list_size)
- res = 1;
}
- spin_unlock_bh(&seq->lock);
+ spin_unlock_bh(&sc->lock);
exit:
rcu_read_unlock();
- return res;
}
/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
* - Creates list of nodes that overlap the given multicast address
- * - Determines if any node local ports overlap
+ * - Determines if any node local destinations overlap
*/
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
u32 upper, struct tipc_nlist *nodes)
{
- struct sub_seq *sseq, *stop;
- struct publication *publ;
- struct name_info *info;
- struct name_seq *seq;
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
+ struct rb_node *n;
rcu_read_lock();
- seq = nametbl_find_seq(net, type);
- if (!seq)
+ sc = tipc_service_find(net, type);
+ if (!sc)
goto exit;
- spin_lock_bh(&seq->lock);
- sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
- stop = seq->sseqs + seq->first_free;
- for (; sseq != stop && sseq->lower <= upper; sseq++) {
- info = sseq->info;
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- tipc_nlist_add(nodes, publ->node);
+ spin_lock_bh(&sc->lock);
+
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->upper < lower)
+ continue;
+ if (sr->lower > upper)
+ break;
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ tipc_nlist_add(nodes, p->node);
}
}
- spin_unlock_bh(&seq->lock);
+ spin_unlock_bh(&sc->lock);
exit:
rcu_read_unlock();
}
@@ -711,90 +573,85 @@ exit:
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
u32 type, u32 scope)
{
- struct sub_seq *sseq, *stop;
- struct name_info *info;
+ struct service_range *sr;
+ struct tipc_service *sc;
struct publication *p;
- struct name_seq *seq;
+ struct rb_node *n;
rcu_read_lock();
- seq = nametbl_find_seq(net, type);
- if (!seq)
+ sc = tipc_service_find(net, type);
+ if (!sc)
goto exit;
- spin_lock_bh(&seq->lock);
- sseq = seq->sseqs;
- stop = seq->sseqs + seq->first_free;
- for (; sseq != stop; sseq++) {
- info = sseq->info;
- list_for_each_entry(p, &info->zone_list, zone_list) {
+ spin_lock_bh(&sc->lock);
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
if (p->scope != scope)
continue;
- tipc_group_add_member(grp, p->node, p->ref, p->lower);
+ tipc_group_add_member(grp, p->node, p->port, p->lower);
}
}
- spin_unlock_bh(&seq->lock);
+ spin_unlock_bh(&sc->lock);
exit:
rcu_read_unlock();
}
-/*
- * tipc_nametbl_publish - add name publication to network name tables
+/* tipc_nametbl_publish - add service binding to name table
*/
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
- u32 upper, u32 scope, u32 port_ref,
+ u32 upper, u32 scope, u32 port,
u32 key)
{
- struct publication *publ;
- struct sk_buff *buf = NULL;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct publication *p = NULL;
+ struct sk_buff *skb = NULL;
spin_lock_bh(&tn->nametbl_lock);
- if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
- pr_warn("Publication failed, local publication limit reached (%u)\n",
- TIPC_MAX_PUBLICATIONS);
- spin_unlock_bh(&tn->nametbl_lock);
- return NULL;
+
+ if (nt->local_publ_count >= TIPC_MAX_PUBL) {
+ pr_warn("Bind failed, max limit %u reached\n", TIPC_MAX_PUBL);
+ goto exit;
}
- publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
- tn->own_addr, port_ref, key);
- if (likely(publ)) {
- tn->nametbl->local_publ_count++;
- buf = tipc_named_publish(net, publ);
- /* Any pending external events? */
- tipc_named_process_backlog(net);
+ p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
+ tipc_own_addr(net), port, key);
+ if (p) {
+ nt->local_publ_count++;
+ skb = tipc_named_publish(net, p);
}
+exit:
spin_unlock_bh(&tn->nametbl_lock);
- if (buf)
- tipc_node_broadcast(net, buf);
- return publ;
+ if (skb)
+ tipc_node_broadcast(net, skb);
+ return p;
}
/**
- * tipc_nametbl_withdraw - withdraw name publication from network name tables
+ * tipc_nametbl_withdraw - withdraw a service binding
*/
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
- u32 key)
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
+ u32 upper, u32 key)
{
- struct publication *publ;
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ u32 self = tipc_own_addr(net);
struct sk_buff *skb = NULL;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct publication *p;
spin_lock_bh(&tn->nametbl_lock);
- publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
- ref, key);
- if (likely(publ)) {
- tn->nametbl->local_publ_count--;
- skb = tipc_named_withdraw(net, publ);
- /* Any pending external events? */
- tipc_named_process_backlog(net);
- list_del_init(&publ->pport_list);
- kfree_rcu(publ, rcu);
+
+ p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key);
+ if (p) {
+ nt->local_publ_count--;
+ skb = tipc_named_withdraw(net, p);
+ list_del_init(&p->binding_sock);
+ kfree_rcu(p, rcu);
} else {
- pr_err("Unable to remove local publication\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- type, lower, ref, key);
+ pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
+ type, lower, upper, key);
}
spin_unlock_bh(&tn->nametbl_lock);
@@ -808,26 +665,26 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(s->net, tipc_net_id);
- u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
- int index = hash(type);
- struct name_seq *seq;
- struct tipc_name_seq ns;
+ struct name_table *nt = tipc_name_table(sub->net);
+ struct tipc_net *tn = tipc_net(sub->net);
+ struct tipc_subscr *s = &sub->evt.s;
+ u32 type = tipc_sub_read(s, seq.type);
+ struct tipc_service *sc;
spin_lock_bh(&tn->nametbl_lock);
- seq = nametbl_find_seq(s->net, type);
- if (!seq)
- seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
- if (seq) {
- spin_lock_bh(&seq->lock);
- tipc_nameseq_subscribe(seq, s, status);
- spin_unlock_bh(&seq->lock);
+ sc = tipc_service_find(sub->net, type);
+ if (!sc)
+ sc = tipc_service_create(type, &nt->services[hash(type)]);
+ if (sc) {
+ spin_lock_bh(&sc->lock);
+ tipc_service_subscribe(sc, sub);
+ spin_unlock_bh(&sc->lock);
} else {
- tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
- pr_warn("Failed to create subscription for {%u,%u,%u}\n",
- ns.type, ns.lower, ns.upper);
+ pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
+ tipc_sub_read(s, seq.lower),
+ tipc_sub_read(s, seq.upper));
}
spin_unlock_bh(&tn->nametbl_lock);
}
@@ -835,126 +692,124 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
/**
* tipc_nametbl_unsubscribe - remove a subscription object from name table
*/
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(s->net, tipc_net_id);
- struct name_seq *seq;
- u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+ struct tipc_net *tn = tipc_net(sub->net);
+ struct tipc_subscr *s = &sub->evt.s;
+ u32 type = tipc_sub_read(s, seq.type);
+ struct tipc_service *sc;
spin_lock_bh(&tn->nametbl_lock);
- seq = nametbl_find_seq(s->net, type);
- if (seq != NULL) {
- spin_lock_bh(&seq->lock);
- list_del_init(&s->nameseq_list);
- tipc_subscrp_put(s);
- if (!seq->first_free && list_empty(&seq->subscriptions)) {
- hlist_del_init_rcu(&seq->ns_list);
- kfree(seq->sseqs);
- spin_unlock_bh(&seq->lock);
- kfree_rcu(seq, rcu);
- } else {
- spin_unlock_bh(&seq->lock);
- }
+ sc = tipc_service_find(sub->net, type);
+ if (!sc)
+ goto exit;
+
+ spin_lock_bh(&sc->lock);
+ list_del_init(&sub->service_list);
+ tipc_sub_put(sub);
+
+ /* Delete service item if no more publications and subscriptions */
+ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+ hlist_del_init_rcu(&sc->service_list);
+ kfree_rcu(sc, rcu);
}
+ spin_unlock_bh(&sc->lock);
+exit:
spin_unlock_bh(&tn->nametbl_lock);
}
int tipc_nametbl_init(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct name_table *tipc_nametbl;
+ struct tipc_net *tn = tipc_net(net);
+ struct name_table *nt;
int i;
- tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC);
- if (!tipc_nametbl)
+ nt = kzalloc(sizeof(*nt), GFP_ATOMIC);
+ if (!nt)
return -ENOMEM;
for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
- INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
+ INIT_HLIST_HEAD(&nt->services[i]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
- tn->nametbl = tipc_nametbl;
+ INIT_LIST_HEAD(&nt->node_scope);
+ INIT_LIST_HEAD(&nt->cluster_scope);
+ tn->nametbl = nt;
spin_lock_init(&tn->nametbl_lock);
return 0;
}
/**
- * tipc_purge_publications - remove all publications for a given type
- *
- * tipc_nametbl_lock must be held when calling this function
+ * tipc_service_delete - purge all publications for a service and delete it
*/
-static void tipc_purge_publications(struct net *net, struct name_seq *seq)
+static void tipc_service_delete(struct net *net, struct tipc_service *sc)
{
- struct publication *publ, *safe;
- struct sub_seq *sseq;
- struct name_info *info;
-
- spin_lock_bh(&seq->lock);
- sseq = seq->sseqs;
- info = sseq->info;
- list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
- tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
- publ->ref, publ->key);
- kfree_rcu(publ, rcu);
+ struct service_range *sr, *tmpr;
+ struct publication *p, *tmpb;
+
+ spin_lock_bh(&sc->lock);
+ rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
+ list_for_each_entry_safe(p, tmpb,
+ &sr->all_publ, all_publ) {
+ tipc_service_remove_publ(net, sc, p->lower, p->upper,
+ p->node, p->key);
+ kfree_rcu(p, rcu);
+ }
}
- hlist_del_init_rcu(&seq->ns_list);
- kfree(seq->sseqs);
- spin_unlock_bh(&seq->lock);
-
- kfree_rcu(seq, rcu);
+ hlist_del_init_rcu(&sc->service_list);
+ spin_unlock_bh(&sc->lock);
+ kfree_rcu(sc, rcu);
}
void tipc_nametbl_stop(struct net *net)
{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct hlist_head *service_head;
+ struct tipc_service *service;
u32 i;
- struct name_seq *seq;
- struct hlist_head *seq_head;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct name_table *tipc_nametbl = tn->nametbl;
/* Verify name table is empty and purge any lingering
* publications, then release the name table
*/
spin_lock_bh(&tn->nametbl_lock);
for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
- if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
+ if (hlist_empty(&nt->services[i]))
continue;
- seq_head = &tipc_nametbl->seq_hlist[i];
- hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
- tipc_purge_publications(net, seq);
+ service_head = &nt->services[i];
+ hlist_for_each_entry_rcu(service, service_head, service_list) {
+ tipc_service_delete(net, service);
}
}
spin_unlock_bh(&tn->nametbl_lock);
synchronize_net();
- kfree(tipc_nametbl);
-
+ kfree(nt);
}
static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
- struct name_seq *seq,
- struct sub_seq *sseq, u32 *last_publ)
+ struct tipc_service *service,
+ struct service_range *sr,
+ u32 *last_key)
{
- void *hdr;
- struct nlattr *attrs;
- struct nlattr *publ;
struct publication *p;
+ struct nlattr *attrs;
+ struct nlattr *b;
+ void *hdr;
- if (*last_publ) {
- list_for_each_entry(p, &sseq->info->zone_list, zone_list)
- if (p->key == *last_publ)
+ if (*last_key) {
+ list_for_each_entry(p, &sr->all_publ, all_publ)
+ if (p->key == *last_key)
break;
- if (p->key != *last_publ)
+ if (p->key != *last_key)
return -EPIPE;
} else {
- p = list_first_entry(&sseq->info->zone_list, struct publication,
- zone_list);
+ p = list_first_entry(&sr->all_publ,
+ struct publication,
+ all_publ);
}
- list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
- *last_publ = p->key;
+ list_for_each_entry_from(p, &sr->all_publ, all_publ) {
+ *last_key = p->key;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
&tipc_genl_family, NLM_F_MULTI,
@@ -966,35 +821,35 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
if (!attrs)
goto msg_full;
- publ = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
- if (!publ)
+ b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
+ if (!b)
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, seq->type))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, service->type))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sseq->lower))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sr->lower))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sseq->upper))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sr->upper))
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
goto publ_msg_full;
- nla_nest_end(msg->skb, publ);
+ nla_nest_end(msg->skb, b);
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
}
- *last_publ = 0;
+ *last_key = 0;
return 0;
publ_msg_full:
- nla_nest_cancel(msg->skb, publ);
+ nla_nest_cancel(msg->skb, b);
attr_msg_full:
nla_nest_cancel(msg->skb, attrs);
msg_full:
@@ -1003,39 +858,34 @@ msg_full:
return -EMSGSIZE;
}
-static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq,
- u32 *last_lower, u32 *last_publ)
+static int __tipc_nl_service_range_list(struct tipc_nl_msg *msg,
+ struct tipc_service *sc,
+ u32 *last_lower, u32 *last_key)
{
- struct sub_seq *sseq;
- struct sub_seq *sseq_start;
+ struct service_range *sr;
+ struct rb_node *n;
int err;
- if (*last_lower) {
- sseq_start = nameseq_find_subseq(seq, *last_lower);
- if (!sseq_start)
- return -EPIPE;
- } else {
- sseq_start = seq->sseqs;
- }
-
- for (sseq = sseq_start; sseq != &seq->sseqs[seq->first_free]; sseq++) {
- err = __tipc_nl_add_nametable_publ(msg, seq, sseq, last_publ);
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->lower < *last_lower)
+ continue;
+ err = __tipc_nl_add_nametable_publ(msg, sc, sr, last_key);
if (err) {
- *last_lower = sseq->lower;
+ *last_lower = sr->lower;
return err;
}
}
*last_lower = 0;
-
return 0;
}
-static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
- u32 *last_type, u32 *last_lower, u32 *last_publ)
+static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
+ u32 *last_type, u32 *last_lower, u32 *last_key)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct hlist_head *seq_head;
- struct name_seq *seq = NULL;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_service *service = NULL;
+ struct hlist_head *head;
int err;
int i;
@@ -1045,30 +895,31 @@ static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
i = 0;
for (; i < TIPC_NAMETBL_SIZE; i++) {
- seq_head = &tn->nametbl->seq_hlist[i];
+ head = &tn->nametbl->services[i];
if (*last_type) {
- seq = nametbl_find_seq(net, *last_type);
- if (!seq)
+ service = tipc_service_find(net, *last_type);
+ if (!service)
return -EPIPE;
} else {
- hlist_for_each_entry_rcu(seq, seq_head, ns_list)
+ hlist_for_each_entry_rcu(service, head, service_list)
break;
- if (!seq)
+ if (!service)
continue;
}
- hlist_for_each_entry_from_rcu(seq, ns_list) {
- spin_lock_bh(&seq->lock);
- err = __tipc_nl_subseq_list(msg, seq, last_lower,
- last_publ);
+ hlist_for_each_entry_from_rcu(service, service_list) {
+ spin_lock_bh(&service->lock);
+ err = __tipc_nl_service_range_list(msg, service,
+ last_lower,
+ last_key);
if (err) {
- *last_type = seq->type;
- spin_unlock_bh(&seq->lock);
+ *last_type = service->type;
+ spin_unlock_bh(&service->lock);
return err;
}
- spin_unlock_bh(&seq->lock);
+ spin_unlock_bh(&service->lock);
}
*last_type = 0;
}
@@ -1077,13 +928,13 @@ static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int err;
- int done = cb->args[3];
+ struct net *net = sock_net(skb->sk);
u32 last_type = cb->args[0];
u32 last_lower = cb->args[1];
- u32 last_publ = cb->args[2];
- struct net *net = sock_net(skb->sk);
+ u32 last_key = cb->args[2];
+ int done = cb->args[3];
struct tipc_nl_msg msg;
+ int err;
if (done)
return 0;
@@ -1093,7 +944,8 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
msg.seq = cb->nlh->nlmsg_seq;
rcu_read_lock();
- err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ);
+ err = tipc_nl_service_list(net, &msg, &last_type,
+ &last_lower, &last_key);
if (!err) {
done = 1;
} else if (err != -EMSGSIZE) {
@@ -1109,7 +961,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = last_type;
cb->args[1] = last_lower;
- cb->args[2] = last_publ;
+ cb->args[2] = last_key;
cb->args[3] = done;
return skb->len;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f56e7cb3d436..4b14fc28d9e2 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
/*
* net/tipc/name_table.h: Include file for TIPC name table code
*
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -54,19 +54,22 @@ struct tipc_group;
* @type: name sequence type
* @lower: name sequence lower bound
* @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ * Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ * Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ * Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ * Used by round-robin lookup algorithm
* @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
*/
struct publication {
u32 type;
@@ -74,34 +77,37 @@ struct publication {
u32 upper;
u32 scope;
u32 node;
- u32 ref;
+ u32 port;
u32 key;
- struct list_head nodesub_list;
- struct list_head local_list;
- struct list_head pport_list;
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
+ struct list_head binding_node;
+ struct list_head binding_sock;
+ struct list_head local_publ;
+ struct list_head all_publ;
struct rcu_head rcu;
};
/**
* struct name_table - table containing all existing port name publications
* @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ * - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ * - used by name_distr to send bulk updates to new nodes
+ * - used by name_distr during re-init of name table
* @local_publ_count: number of publications issued by this node
*/
struct name_table {
- struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
- struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+ struct hlist_head services[TIPC_NAMETBL_SIZE];
+ struct list_head node_scope;
+ struct list_head cluster_scope;
u32 local_publ_count;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports);
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
@@ -110,17 +116,17 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
struct list_head *dsts, int *dstcnt, u32 exclude,
bool all);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
- u32 upper, u32 scope, u32 port_ref,
+ u32 upper, u32 scope, u32 port,
u32 key);
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
u32 key);
struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
u32 lower, u32 upper, u32 scope,
u32 node, u32 ref, u32 key);
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
- u32 lower, u32 node, u32 ref,
- u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+ u32 lower, u32 upper,
+ u32 node, u32 key);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a2fde0d6f61..856f9e97ea29 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,38 +104,39 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-int tipc_net_start(struct net *net, u32 addr)
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- char addr_string[16];
+ if (tipc_own_id(net)) {
+ pr_info("Cannot configure node identity twice\n");
+ return -1;
+ }
+ pr_info("Started in network mode\n");
- tn->own_addr = addr;
+ if (node_id)
+ tipc_set_node_id(net, node_id);
+ if (addr)
+ tipc_net_finalize(net, addr);
+ return 0;
+}
- /* Ensure that the new address is visible before we reinit. */
+void tipc_net_finalize(struct net *net, u32 addr)
+{
+ tipc_set_node_addr(net, addr);
smp_mb();
-
tipc_named_reinit(net);
tipc_sk_reinit(net);
-
- tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
- TIPC_ZONE_SCOPE, 0, tn->own_addr);
-
- pr_info("Started in network mode\n");
- pr_info("Own node address %s, network identity %u\n",
- tipc_addr_string_fill(addr_string, tn->own_addr),
- tn->net_id);
- return 0;
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
}
void tipc_net_stop(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ u32 self = tipc_own_addr(net);
- if (!tn->own_addr)
+ if (!self)
return;
- tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
- tn->own_addr);
+ tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
rtnl_lock();
tipc_bearer_stop(net);
tipc_node_stop(net);
@@ -147,8 +148,10 @@ void tipc_net_stop(struct net *net)
static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- void *hdr;
+ u64 *w0 = (u64 *)&tn->node_id[0];
+ u64 *w1 = (u64 *)&tn->node_id[8];
struct nlattr *attrs;
+ void *hdr;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_NET_GET);
@@ -161,7 +164,10 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
goto attr_msg_full;
-
+ if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0))
+ goto attr_msg_full;
+ if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0))
+ goto attr_msg_full;
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
@@ -202,9 +208,9 @@ out:
int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
{
- struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
int err;
if (!info->attrs[TIPC_NLA_NET])
@@ -213,16 +219,17 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
info->extack);
+
if (err)
return err;
+ /* Can't change net id once TIPC has joined a network */
+ if (tipc_own_addr(net))
+ return -EPERM;
+
if (attrs[TIPC_NLA_NET_ID]) {
u32 val;
- /* Can't change net id once TIPC has joined a network */
- if (tn->own_addr)
- return -EPERM;
-
val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
if (val < 1 || val > 9999)
return -EINVAL;
@@ -233,17 +240,22 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
if (attrs[TIPC_NLA_NET_ADDR]) {
u32 addr;
- /* Can't change net addr once TIPC has joined a network */
- if (tn->own_addr)
- return -EPERM;
-
addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
- if (!tipc_addr_node_valid(addr))
+ if (!addr)
return -EINVAL;
-
- tipc_net_start(net, addr);
+ tn->legacy_addr_format = true;
+ tipc_net_init(net, NULL, addr);
}
+ if (attrs[TIPC_NLA_NET_NODEID]) {
+ u8 node_id[NODE_ID_LEN];
+ u64 *w0 = (u64 *)&node_id[0];
+ u64 *w1 = (u64 *)&node_id[8];
+
+ *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+ *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+ tipc_net_init(net, node_id, 0);
+ }
return 0;
}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index c0306aa2374b..09ad02b50bb1 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,10 +41,9 @@
extern const struct nla_policy tipc_nl_net_policy[];
-int tipc_net_start(struct net *net, u32 addr);
-
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
+void tipc_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
-
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9036d8756e73..c77dd2f3c589 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -115,6 +115,7 @@ struct tipc_node {
u16 capabilities;
u32 signature;
u32 link_id;
+ u8 peer_id[16];
struct list_head publ_list;
struct list_head conn_sks;
unsigned long keepalive_intv;
@@ -156,6 +157,7 @@ static void tipc_node_delete(struct tipc_node *node);
static void tipc_node_timeout(struct timer_list *t);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
static void tipc_node_put(struct tipc_node *node);
static bool node_is_up(struct tipc_node *n);
@@ -233,9 +235,6 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
struct tipc_node *node;
unsigned int thash = tipc_hashfn(addr);
- if (unlikely(!in_own_cluster_exact(net, addr)))
- return NULL;
-
rcu_read_lock();
hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
if (node->addr != addr)
@@ -248,6 +247,30 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
return node;
}
+/* tipc_node_find_by_id - locate specified node object by its 128-bit id
+ * Note: this function is called only when a discovery request failed
+ * to find the node by its 32-bit id, and is not time critical
+ */
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+ bool found = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ read_lock_bh(&n->lock);
+ if (!memcmp(id, n->peer_id, 16) &&
+ kref_get_unless_zero(&n->kref))
+ found = true;
+ read_unlock_bh(&n->lock);
+ if (found)
+ break;
+ }
+ rcu_read_unlock();
+ return found ? n : NULL;
+}
+
static void tipc_node_read_lock(struct tipc_node *n)
{
read_lock_bh(&n->lock);
@@ -301,16 +324,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
if (flags & TIPC_NOTIFY_LINK_UP) {
tipc_mon_peer_up(net, addr, bearer_id);
tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
- TIPC_NODE_SCOPE, link_id, addr);
+ TIPC_NODE_SCOPE, link_id, link_id);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
tipc_mon_peer_down(net, addr, bearer_id);
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
- link_id, addr);
+ addr, link_id);
}
}
-struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
+static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
+ u8 *peer_id, u16 capabilities)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n, *temp_node;
@@ -329,6 +353,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
goto exit;
}
n->addr = addr;
+ memcpy(&n->peer_id, peer_id, 16);
n->net = net;
n->capabilities = capabilities;
kref_init(&n->kref);
@@ -347,8 +372,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr,
- U16_MAX,
+ if (!tipc_link_bc_create(net, tipc_own_addr(net),
+ addr, U16_MAX,
tipc_link_window(tipc_bc_sndlink(net)),
n->capabilities,
&n->bc_entry.inputq1,
@@ -738,8 +763,51 @@ bool tipc_node_is_up(struct net *net, u32 addr)
return retval;
}
-void tipc_node_check_dest(struct net *net, u32 onode,
- struct tipc_bearer *b,
+static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+
+ addr ^= tipc_net(net)->random;
+ while ((n = tipc_node_find(net, addr))) {
+ tipc_node_put(n);
+ addr++;
+ }
+ return addr;
+}
+
+/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ */
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+
+ /* Suggest new address if some other peer is using this one */
+ n = tipc_node_find(net, addr);
+ if (n) {
+ if (!memcmp(n->peer_id, id, NODE_ID_LEN))
+ addr = 0;
+ tipc_node_put(n);
+ if (!addr)
+ return 0;
+ return tipc_node_suggest_addr(net, addr);
+ }
+
+ /* Suggest previously used address if peer is known */
+ n = tipc_node_find_by_id(net, id);
+ if (n) {
+ addr = n->addr;
+ tipc_node_put(n);
+ }
+ /* Even this node may be in trial phase */
+ if (tn->trial_addr == addr)
+ return tipc_node_suggest_addr(net, addr);
+
+ return addr;
+}
+
+void tipc_node_check_dest(struct net *net, u32 addr,
+ u8 *peer_id, struct tipc_bearer *b,
u16 capabilities, u32 signature,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr)
@@ -758,7 +826,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
*dupl_addr = false;
*respond = false;
- n = tipc_node_create(net, onode, capabilities);
+ n = tipc_node_create(net, addr, peer_id, capabilities);
if (!n)
return;
@@ -836,15 +904,14 @@ void tipc_node_check_dest(struct net *net, u32 onode,
/* Now create new link if not already existing */
if (!l) {
- if (n->link_cnt == 2) {
- pr_warn("Cannot establish 3rd link to %x\n", n->addr);
+ if (n->link_cnt == 2)
goto exit;
- }
+
if_name = strchr(b->name, ':') + 1;
if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
b->net_plane, b->mtu, b->priority,
b->window, mod(tipc_net(net)->random),
- tipc_own_addr(net), onode,
+ tipc_own_addr(net), addr, peer_id,
n->capabilities,
tipc_bc_sndlink(n->net), n->bc_entry.link,
&le->inputq,
@@ -887,11 +954,9 @@ void tipc_node_delete_links(struct net *net, int bearer_id)
static void tipc_node_reset_links(struct tipc_node *n)
{
- char addr_string[16];
int i;
- pr_warn("Resetting all links to %s\n",
- tipc_addr_string_fill(addr_string, n->addr));
+ pr_warn("Resetting all links to %x\n", n->addr);
for (i = 0; i < MAX_BEARERS; i++) {
tipc_node_link_down(n, i, false);
@@ -1078,15 +1143,13 @@ illegal_evt:
static void node_lost_contact(struct tipc_node *n,
struct sk_buff_head *inputq)
{
- char addr_string[16];
struct tipc_sock_conn *conn, *safe;
struct tipc_link *l;
struct list_head *conns = &n->conn_sks;
struct sk_buff *skb;
uint i;
- pr_debug("Lost contact with %s\n",
- tipc_addr_string_fill(addr_string, n->addr));
+ pr_debug("Lost contact with %x\n", n->addr);
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
@@ -1618,6 +1681,30 @@ discard:
kfree_skb(skb);
}
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+ struct tipc_net *tn = tipc_net(net);
+ int bearer_id = b->identity;
+ struct sk_buff_head xmitq;
+ struct tipc_link_entry *e;
+ struct tipc_node *n;
+
+ __skb_queue_head_init(&xmitq);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_write_lock(n);
+ e = &n->links[bearer_id];
+ if (e->link)
+ tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+ tipc_node_write_unlock(n);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+ }
+
+ rcu_read_unlock();
+}
+
int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index acd58d23a70e..f24b83500df1 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -49,22 +49,25 @@ enum {
TIPC_BCAST_STATE_NACK = (1 << 2),
TIPC_BLOCK_FLOWCTL = (1 << 3),
TIPC_BCAST_RCAST = (1 << 4),
- TIPC_MCAST_GROUPS = (1 << 5)
+ TIPC_NODE_ID128 = (1 << 5)
};
#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
TIPC_BCAST_STATE_NACK | \
TIPC_BCAST_RCAST | \
- TIPC_BLOCK_FLOWCTL)
+ TIPC_BLOCK_FLOWCTL | \
+ TIPC_NODE_ID128)
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
-void tipc_node_check_dest(struct net *net, u32 onode,
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
+void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
u16 capabilities, u32 signature,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644
index df0c563c90cd..000000000000
--- a/net/tipc/server.c
+++ /dev/null
@@ -1,710 +0,0 @@
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include "socket.h"
-#include "addr.h"
-#include "msg.h"
-#include <net/sock.h>
-#include <linux/module.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT 25
-#define MAX_RECV_MSG_COUNT 25
-#define CF_CONNECTED 1
-#define CF_SERVER 2
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
- struct kref kref;
- int conid;
- struct socket *sock;
- unsigned long flags;
- struct tipc_server *server;
- struct work_struct rwork;
- int (*rx_action) (struct tipc_conn *con);
- void *usr_data;
- struct list_head outqueue;
- spinlock_t outqueue_lock;
- struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
- struct list_head list;
- struct kvec iov;
- struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
- struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct tipc_server *s = con->server;
- struct sockaddr_tipc *saddr = s->saddr;
- struct socket *sock = con->sock;
- struct sock *sk;
-
- if (sock) {
- sk = sock->sk;
- if (test_bit(CF_SERVER, &con->flags)) {
- __module_get(sock->ops->owner);
- __module_get(sk->sk_prot_creator->owner);
- }
- saddr->scope = -TIPC_NODE_SCOPE;
- kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
- sock_release(sock);
- con->sock = NULL;
- }
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
- tipc_clean_outqueues(con);
- kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
- kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
- kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- spin_lock_bh(&s->idr_lock);
- con = idr_find(&s->conn_idr, conid);
- if (con) {
- if (!test_bit(CF_CONNECTED, &con->flags) ||
- !kref_get_unless_zero(&con->kref))
- con = NULL;
- }
- spin_unlock_bh(&s->idr_lock);
- return con;
-}
-
-static void sock_data_ready(struct sock *sk)
-{
- struct tipc_conn *con;
-
- read_lock_bh(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->rcv_wq, &con->rwork))
- conn_put(con);
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
- struct tipc_conn *con;
-
- read_lock_bh(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->send_wq, &con->swork))
- conn_put(con);
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
- struct sock *sk = sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
-
- sk->sk_data_ready = sock_data_ready;
- sk->sk_write_space = sock_write_space;
- sk->sk_user_data = con;
-
- con->sock = sock;
-
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct sock *sk = con->sock->sk;
- bool disconnect = false;
-
- write_lock_bh(&sk->sk_callback_lock);
- disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
- if (disconnect) {
- sk->sk_user_data = NULL;
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
- }
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* Handle concurrent calls from sending and receiving threads */
- if (!disconnect)
- return;
-
- /* Don't flush pending works, -just let them expire */
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
- conn_put(con);
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int ret;
-
- con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
- if (!con)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&con->kref);
- INIT_LIST_HEAD(&con->outqueue);
- spin_lock_init(&con->outqueue_lock);
- INIT_WORK(&con->swork, tipc_send_work);
- INIT_WORK(&con->rwork, tipc_recv_work);
-
- spin_lock_bh(&s->idr_lock);
- ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
- if (ret < 0) {
- kfree(con);
- spin_unlock_bh(&s->idr_lock);
- return ERR_PTR(-ENOMEM);
- }
- con->conid = ret;
- s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
-
- set_bit(CF_CONNECTED, &con->flags);
- con->server = s;
-
- return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct sock *sk = con->sock->sk;
- struct sockaddr_tipc addr;
- struct msghdr msg = {};
- struct kvec iov;
- void *buf;
- int ret;
-
- buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
- if (!buf) {
- ret = -ENOMEM;
- goto out_close;
- }
-
- iov.iov_base = buf;
- iov.iov_len = s->max_rcvbuf_size;
- msg.msg_name = &addr;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
- ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
- if (ret <= 0) {
- kmem_cache_free(s->rcvbuf_cache, buf);
- goto out_close;
- }
-
- read_lock_bh(&sk->sk_callback_lock);
- if (test_bit(CF_CONNECTED, &con->flags))
- ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
- &addr, con->usr_data, buf, ret);
- read_unlock_bh(&sk->sk_callback_lock);
- kmem_cache_free(s->rcvbuf_cache, buf);
- if (ret < 0)
- tipc_conn_terminate(s, con->conid);
- return ret;
-
-out_close:
- if (ret != -EWOULDBLOCK)
- tipc_close_conn(con);
- else if (ret == 0)
- /* Don't return success if we really got EOF */
- ret = -EAGAIN;
-
- return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = con->sock;
- struct socket *newsock;
- struct tipc_conn *newcon;
- int ret;
-
- ret = kernel_accept(sock, &newsock, O_NONBLOCK);
- if (ret < 0)
- return ret;
-
- newcon = tipc_alloc_conn(con->server);
- if (IS_ERR(newcon)) {
- ret = PTR_ERR(newcon);
- sock_release(newsock);
- return ret;
- }
-
- newcon->rx_action = tipc_receive_from_sock;
- tipc_register_callbacks(newsock, newcon);
-
- /* Notify that new connection is incoming */
- newcon->usr_data = s->tipc_conn_new(newcon->conid);
- if (!newcon->usr_data) {
- sock_release(newsock);
- conn_put(newcon);
- return -ENOMEM;
- }
-
- /* Wake up receive process in case of 'SYN+' message */
- newsock->sk->sk_data_ready(newsock->sk);
- return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = NULL;
- int ret;
-
- ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
- if (ret < 0)
- return NULL;
- ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&s->imp, sizeof(s->imp));
- if (ret < 0)
- goto create_err;
- ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
- if (ret < 0)
- goto create_err;
-
- switch (s->type) {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- con->rx_action = tipc_accept_from_sock;
-
- ret = kernel_listen(sock, 0);
- if (ret < 0)
- goto create_err;
- break;
- case SOCK_DGRAM:
- case SOCK_RDM:
- con->rx_action = tipc_receive_from_sock;
- break;
- default:
- pr_err("Unknown socket type %d\n", s->type);
- goto create_err;
- }
-
- /* As server's listening socket owner and creator is the same module,
- * we have to decrease TIPC module reference count to guarantee that
- * it remains zero after the server socket is created, otherwise,
- * executing "rmmod" command is unable to make TIPC module deleted
- * after TIPC module is inserted successfully.
- *
- * However, the reference count is ever increased twice in
- * sock_create_kern(): one is to increase the reference count of owner
- * of TIPC socket's proto_ops struct; another is to increment the
- * reference count of owner of TIPC proto struct. Therefore, we must
- * decrement the module reference count twice to ensure that it keeps
- * zero after server's listening socket is created. Of course, we
- * must bump the module reference count twice as well before the socket
- * is closed.
- */
- module_put(sock->ops->owner);
- module_put(sock->sk->sk_prot_creator->owner);
- set_bit(CF_SERVER, &con->flags);
-
- return sock;
-
-create_err:
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
- return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
- struct socket *sock;
- struct tipc_conn *con;
-
- con = tipc_alloc_conn(s);
- if (IS_ERR(con))
- return PTR_ERR(con);
-
- sock = tipc_create_listen_sock(con);
- if (!sock) {
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- kfree(con);
- return -EINVAL;
- }
-
- tipc_register_callbacks(sock, con);
- return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
- struct outqueue_entry *entry;
- void *buf;
-
- entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
- if (!entry)
- return NULL;
-
- buf = kmemdup(data, len, GFP_ATOMIC);
- if (!buf) {
- kfree(entry);
- return NULL;
- }
-
- entry->iov.iov_base = buf;
- entry->iov.iov_len = len;
-
- return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
- kfree(e->iov.iov_base);
- kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
- struct outqueue_entry *e, *safe;
-
- spin_lock_bh(&con->outqueue_lock);
- list_for_each_entry_safe(e, safe, &con->outqueue, list) {
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len)
-{
- struct outqueue_entry *e;
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (!con)
- return -EINVAL;
-
- if (!test_bit(CF_CONNECTED, &con->flags)) {
- conn_put(con);
- return 0;
- }
-
- e = tipc_alloc_entry(data, len);
- if (!e) {
- conn_put(con);
- return -ENOMEM;
- }
-
- if (addr)
- memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
- spin_lock_bh(&con->outqueue_lock);
- list_add_tail(&e->list, &con->outqueue);
- spin_unlock_bh(&con->outqueue_lock);
-
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
- return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (con) {
- tipc_close_conn(con);
- conn_put(con);
- }
-}
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
- u32 upper, u32 filter, int *conid)
-{
- struct tipc_subscriber *scbr;
- struct tipc_subscr sub;
- struct tipc_server *s;
- struct tipc_conn *con;
-
- sub.seq.type = type;
- sub.seq.lower = lower;
- sub.seq.upper = upper;
- sub.timeout = TIPC_WAIT_FOREVER;
- sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
-
- con = tipc_alloc_conn(tipc_topsrv(net));
- if (IS_ERR(con))
- return false;
-
- *conid = con->conid;
- s = con->server;
- scbr = s->tipc_conn_new(*conid);
- if (!scbr) {
- conn_put(con);
- return false;
- }
-
- con->usr_data = scbr;
- con->sock = NULL;
- s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
- return true;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
-{
- struct tipc_conn *con;
- struct tipc_server *srv;
-
- con = tipc_conn_lookup(tipc_topsrv(net), conid);
- if (!con)
- return;
-
- test_and_clear_bit(CF_CONNECTED, &con->flags);
- srv = con->server;
- if (con->conid)
- srv->tipc_conn_release(con->conid, con->usr_data);
- conn_put(con);
- conn_put(con);
-}
-
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
-{
- u32 port = *(u32 *)&evt->s.usr_handle;
- u32 self = tipc_own_addr(net);
- struct sk_buff_head evtq;
- struct sk_buff *skb;
-
- skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
- self, self, port, port, 0);
- if (!skb)
- return;
- msg_set_dest_droppable(buf_msg(skb), true);
- memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
- skb_queue_head_init(&evtq);
- __skb_queue_tail(&evtq, skb);
- tipc_sk_rcv(net, &evtq);
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct outqueue_entry *e;
- struct tipc_event *evt;
- struct msghdr msg;
- int count = 0;
- int ret;
-
- spin_lock_bh(&con->outqueue_lock);
- while (test_bit(CF_CONNECTED, &con->flags)) {
- e = list_entry(con->outqueue.next, struct outqueue_entry, list);
- if ((struct list_head *) e == &con->outqueue)
- break;
-
- spin_unlock_bh(&con->outqueue_lock);
-
- if (con->sock) {
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
- }
- } else {
- evt = e->iov.iov_base;
- tipc_send_kern_top_evt(s->net, evt);
- }
- /* Don't starve users filling buffers */
- if (++count >= MAX_SEND_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
-
- spin_lock_bh(&con->outqueue_lock);
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-out:
- return;
-
-send_err:
- tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
- int count = 0;
-
- while (test_bit(CF_CONNECTED, &con->flags)) {
- if (con->rx_action(con))
- break;
-
- /* Don't flood Rx machine */
- if (++count >= MAX_RECV_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- }
- conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
- if (test_bit(CF_CONNECTED, &con->flags))
- tipc_send_to_sock(con);
-
- conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
- destroy_workqueue(s->rcv_wq);
- destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
- s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
- if (!s->rcv_wq) {
- pr_err("can't start tipc receive workqueue\n");
- return -ENOMEM;
- }
-
- s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
- if (!s->send_wq) {
- pr_err("can't start tipc send workqueue\n");
- destroy_workqueue(s->rcv_wq);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
- int ret;
-
- spin_lock_init(&s->idr_lock);
- idr_init(&s->conn_idr);
- s->idr_in_use = 0;
-
- s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!s->rcvbuf_cache)
- return -ENOMEM;
-
- ret = tipc_work_start(s);
- if (ret < 0) {
- kmem_cache_destroy(s->rcvbuf_cache);
- return ret;
- }
- ret = tipc_open_listening_sock(s);
- if (ret < 0) {
- tipc_work_stop(s);
- kmem_cache_destroy(s->rcvbuf_cache);
- return ret;
- }
- return ret;
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int id;
-
- spin_lock_bh(&s->idr_lock);
- for (id = 0; s->idr_in_use; id++) {
- con = idr_find(&s->conn_idr, id);
- if (con) {
- spin_unlock_bh(&s->idr_lock);
- tipc_close_conn(con);
- spin_lock_bh(&s->idr_lock);
- }
- }
- spin_unlock_bh(&s->idr_lock);
-
- tipc_work_stop(s);
- kmem_cache_destroy(s->rcvbuf_cache);
- idr_destroy(&s->conn_idr);
-}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b0323ec7971e..1fd1c8b5ce03 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -289,10 +289,9 @@ static bool tipc_sk_type_connectionless(struct sock *sk)
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
{
struct sock *sk = &tsk->sk;
- struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
+ u32 self = tipc_own_addr(sock_net(sk));
u32 peer_port = tsk_peer_port(tsk);
- u32 orig_node;
- u32 peer_node;
+ u32 orig_node, peer_node;
if (unlikely(!tipc_sk_connected(sk)))
return false;
@@ -306,10 +305,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
if (likely(orig_node == peer_node))
return true;
- if (!orig_node && (peer_node == tn->own_addr))
+ if (!orig_node && peer_node == self)
return true;
- if (!peer_node && (orig_node == tn->own_addr))
+ if (!peer_node && orig_node == self)
return true;
return false;
@@ -461,8 +460,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
/* Ensure tsk is visible before we read own_addr. */
smp_mb();
- tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
- NAMED_H_SIZE, 0);
+ tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
+ TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
msg_set_origport(msg, tsk->portid);
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
@@ -473,6 +472,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+ tsk->group_is_open = true;
atomic_set(&tsk->dupl_rcvcnt, 0);
/* Start out with safe limits until we receive an advertised window */
@@ -643,7 +643,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
goto exit;
}
- res = (addr->scope > 0) ?
+ res = (addr->scope >= 0) ?
tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
exit:
@@ -665,12 +665,11 @@ exit:
* a completely predictable manner).
*/
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
memset(addr, 0, sizeof(*addr));
if (peer) {
@@ -681,16 +680,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
addr->addr.id.node = tsk_peer_node(tsk);
} else {
addr->addr.id.ref = tsk->portid;
- addr->addr.id.node = tn->own_addr;
+ addr->addr.id.node = tipc_own_addr(sock_net(sk));
}
- *uaddr_len = sizeof(*addr);
addr->addrtype = TIPC_ADDR_ID;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
- return 0;
+ return sizeof(*addr);
}
/**
@@ -1280,8 +1278,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
- u32 type, inst, domain;
u32 dnode, dport;
+ u32 type, inst;
int mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1332,13 +1330,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (dest->addrtype == TIPC_ADDR_NAME) {
type = dest->addr.name.name.type;
inst = dest->addr.name.name.instance;
- domain = dest->addr.name.domain;
- dnode = domain;
+ dnode = dest->addr.name.domain;
msg_set_type(hdr, TIPC_NAMED_MSG);
msg_set_hdr_sz(hdr, NAMED_H_SIZE);
msg_set_nametype(hdr, type);
msg_set_nameinst(hdr, inst);
- msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+ msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
dport = tipc_nametbl_translate(net, type, inst, &dnode);
msg_set_destnode(hdr, dnode);
msg_set_destport(hdr, dport);
@@ -2123,8 +2120,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
(!sk_conn && msg_connected(hdr)) ||
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
- else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+ else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+ atomic_inc(&sk->sk_drops);
err = TIPC_ERR_OVERLOAD;
+ }
if (unlikely(err)) {
tipc_skb_reject(net, err, skb, xmitq);
@@ -2203,6 +2202,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Overload => reject message back to sender */
onode = tipc_own_addr(sock_net(sk));
+ atomic_inc(&sk->sk_drops);
if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
__skb_queue_tail(xmitq, skb);
break;
@@ -2592,6 +2592,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct publication *publ;
u32 key;
+ if (scope != TIPC_NODE_SCOPE)
+ scope = TIPC_CLUSTER_SCOPE;
+
if (tipc_sk_connected(sk))
return -EINVAL;
key = tsk->portid + tsk->pub_count + 1;
@@ -2603,7 +2606,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
if (unlikely(!publ))
return -EINVAL;
- list_add(&publ->pport_list, &tsk->publications);
+ list_add(&publ->binding_sock, &tsk->publications);
tsk->pub_count++;
tsk->published = 1;
return 0;
@@ -2617,7 +2620,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
struct publication *safe;
int rc = -EINVAL;
- list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+ if (scope != TIPC_NODE_SCOPE)
+ scope = TIPC_CLUSTER_SCOPE;
+
+ list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
if (seq) {
if (publ->scope != scope)
continue;
@@ -2628,12 +2634,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
if (publ->upper != seq->upper)
break;
tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->ref, publ->key);
+ publ->upper, publ->key);
rc = 0;
break;
}
tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->ref, publ->key);
+ publ->upper, publ->key);
rc = 0;
}
if (list_empty(&tsk->publications))
@@ -2659,8 +2665,8 @@ void tipc_sk_reinit(struct net *net)
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
msg = &tsk->phdr;
- msg_set_prevnode(msg, tn->own_addr);
- msg_set_orignode(msg, tn->own_addr);
+ msg_set_prevnode(msg, tipc_own_addr(net));
+ msg_set_orignode(msg, tipc_own_addr(net));
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
@@ -3155,16 +3161,32 @@ msg_full:
return -EMSGSIZE;
}
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+ *tsk)
+{
+ struct net *net = sock_net(skb->sk);
+ struct sock *sk = &tsk->sk;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
+ return -EMSGSIZE;
+
+ if (tipc_sk_connected(sk)) {
+ if (__tipc_nl_add_sk_con(skb, tsk))
+ return -EMSGSIZE;
+ } else if (!list_empty(&tsk->publications)) {
+ if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
/* Caller should hold socket lock for the passed tipc socket. */
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
struct tipc_sock *tsk)
{
- int err;
- void *hdr;
struct nlattr *attrs;
- struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sock *sk = &tsk->sk;
+ void *hdr;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -3174,19 +3196,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
if (!attrs)
goto genlmsg_cancel;
- if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
- goto attr_msg_cancel;
- if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+
+ if (__tipc_nl_add_sk_info(skb, tsk))
goto attr_msg_cancel;
- if (tipc_sk_connected(sk)) {
- err = __tipc_nl_add_sk_con(skb, tsk);
- if (err)
- goto attr_msg_cancel;
- } else if (!list_empty(&tsk->publications)) {
- if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
- goto attr_msg_cancel;
- }
nla_nest_end(skb, attrs);
genlmsg_end(skb, hdr);
@@ -3200,16 +3213,19 @@ msg_cancel:
return -EMSGSIZE;
}
-int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+ int (*skb_handler)(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk))
{
- int err;
- struct tipc_sock *tsk;
- const struct bucket_table *tbl;
- struct rhash_head *pos;
struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- u32 tbl_id = cb->args[0];
+ struct tipc_net *tn = tipc_net(net);
+ const struct bucket_table *tbl;
u32 prev_portid = cb->args[1];
+ u32 tbl_id = cb->args[0];
+ struct rhash_head *pos;
+ struct tipc_sock *tsk;
+ int err;
rcu_read_lock();
tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
@@ -3221,12 +3237,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
- err = __tipc_nl_add_sk(skb, cb, tsk);
+ err = skb_handler(skb, cb, tsk);
if (err) {
prev_portid = tsk->portid;
spin_unlock_bh(&tsk->sk.sk_lock.slock);
goto out;
}
+
prev_portid = 0;
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
@@ -3238,6 +3255,76 @@ out:
return skb->len;
}
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tipc_sock *tsk, u32 sk_filter_state,
+ u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+ struct sock *sk = &tsk->sk;
+ struct nlattr *attrs;
+ struct nlattr *stat;
+
+ /*filter response w.r.t sk_state*/
+ if (!(sk_filter_state & (1 << sk->sk_state)))
+ return 0;
+
+ attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+ if (!attrs)
+ goto msg_cancel;
+
+ if (__tipc_nl_add_sk_info(skb, tsk))
+ goto attr_msg_cancel;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+ from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
+ sock_i_uid(sk))) ||
+ nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+ tipc_diag_gen_cookie(sk),
+ TIPC_NLA_SOCK_PAD))
+ goto attr_msg_cancel;
+
+ stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+ if (!stat)
+ goto attr_msg_cancel;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+ skb_queue_len(&sk->sk_receive_queue)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+ skb_queue_len(&sk->sk_write_queue)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+ atomic_read(&sk->sk_drops)))
+ goto stat_msg_cancel;
+
+ if (tsk->cong_link_cnt &&
+ nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+ goto stat_msg_cancel;
+
+ if (tsk_conn_cong(tsk) &&
+ nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+ goto stat_msg_cancel;
+
+ nla_nest_end(skb, stat);
+ nla_nest_end(skb, attrs);
+
+ return 0;
+
+stat_msg_cancel:
+ nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+ nla_nest_cancel(skb, attrs);
+msg_cancel:
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
+
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
/* Caller should hold socket lock for the passed tipc socket. */
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
@@ -3287,7 +3374,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
struct publication *p;
if (*last_publ) {
- list_for_each_entry(p, &tsk->publications, pport_list) {
+ list_for_each_entry(p, &tsk->publications, binding_sock) {
if (p->key == *last_publ)
break;
}
@@ -3304,10 +3391,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
}
} else {
p = list_first_entry(&tsk->publications, struct publication,
- pport_list);
+ binding_sock);
}
- list_for_each_entry_from(p, &tsk->publications, pport_list) {
+ list_for_each_entry_from(p, &tsk->publications, binding_sock) {
err = __tipc_nl_add_sk_publ(skb, cb, p);
if (err) {
*last_publ = p->key;
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 06fb5944cf76..aff9b2ae5a1f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,6 +49,8 @@
#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2)
#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16)
+struct tipc_sock;
+
int tipc_socket_init(void);
void tipc_socket_stop(void);
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
@@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net);
void tipc_sk_rht_destroy(struct net *net);
int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tipc_sock *tsk, u32 sk_filter_state,
+ u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+ int (*skb_handler)(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk));
#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 68e26470c516..b7d80bc5f4ab 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,7 +1,7 @@
/*
* net/tipc/subscr.c: TIPC network topology service
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -38,61 +38,30 @@
#include "name_table.h"
#include "subscr.h"
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
- struct kref kref;
- int conid;
- spinlock_t lock;
- struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
- return swap ? swab32(in) : in;
-}
-
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port_ref, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
- struct tipc_subscriber *subscriber = sub->subscriber;
- struct kvec msg_sect;
+ struct tipc_event *evt = &sub->evt;
- msg_sect.iov_base = (void *)&sub->evt;
- msg_sect.iov_len = sizeof(struct tipc_event);
- sub->evt.event = htohl(event, sub->swap);
- sub->evt.found_lower = htohl(found_lower, sub->swap);
- sub->evt.found_upper = htohl(found_upper, sub->swap);
- sub->evt.port.ref = htohl(port_ref, sub->swap);
- sub->evt.port.node = htohl(node, sub->swap);
- tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
- msg_sect.iov_base, msg_sect.iov_len);
+ if (sub->inactive)
+ return;
+ tipc_evt_write(evt, event, event);
+ tipc_evt_write(evt, found_lower, found_lower);
+ tipc_evt_write(evt, found_upper, found_upper);
+ tipc_evt_write(evt, port.ref, port);
+ tipc_evt_write(evt, port.node, node);
+ tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
}
/**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
* given values
*
* Returns 1 if there is overlap, otherwise 0.
*/
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+ u32 found_upper)
{
if (found_lower < seq->lower)
found_lower = seq->lower;
@@ -103,298 +72,100 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
return 1;
}
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
-{
- return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
- struct tipc_name_seq *out)
-{
- out->type = htohl(in->type, swap);
- out->lower = htohl(in->lower, swap);
- out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node,
+ u32 scope, int must)
{
- u32 filter = htohl(sub->evt.s.filter, sub->swap);
+ struct tipc_subscr *s = &sub->evt.s;
+ u32 filter = tipc_sub_read(s, filter);
struct tipc_name_seq seq;
- tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
- if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+ seq.type = tipc_sub_read(s, seq.type);
+ seq.lower = tipc_sub_read(s, seq.lower);
+ seq.upper = tipc_sub_read(s, seq.upper);
+
+ if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
return;
+
if (!must && !(filter & TIPC_SUB_PORTS))
return;
if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
return;
if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
return;
-
- tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
- node);
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, found_lower, found_upper,
+ event, port, node);
+ spin_unlock(&sub->lock);
}
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
{
struct tipc_subscription *sub = from_timer(sub, t, timer);
- struct tipc_subscriber *subscriber = sub->subscriber;
-
- spin_lock_bh(&subscriber->lock);
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- spin_unlock_bh(&subscriber->lock);
-
- /* Notify subscriber of timeout */
- tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
- TIPC_SUBSCR_TIMEOUT, 0, 0);
-
- tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
- kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
- kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
+ struct tipc_subscr *s = &sub->evt.s;
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
- kref_get(&subscriber->kref);
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+ TIPC_SUBSCR_TIMEOUT, 0, 0);
+ sub->inactive = true;
+ spin_unlock(&sub->lock);
}
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
{
- struct tipc_subscription *sub = container_of(kref,
- struct tipc_subscription,
- kref);
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
- struct tipc_subscriber *subscriber = sub->subscriber;
-
- atomic_dec(&tn->subscription_count);
- kfree(sub);
- tipc_subscrb_put(subscriber);
+ kfree(container_of(kref, struct tipc_subscription, kref));
}
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
{
- kref_put(&subscription->kref, tipc_subscrp_kref_release);
+ kref_put(&subscription->kref, tipc_sub_kref_release);
}
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
{
kref_get(&subscription->kref);
}
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
- struct tipc_subscr *s)
-{
- struct list_head *subscription_list = &subscriber->subscrp_list;
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
- if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
- continue;
-
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- tipc_subscrp_put(sub);
- }
-
- if (s)
- break;
- }
- spin_unlock_bh(&subscriber->lock);
-}
-
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
- struct tipc_subscriber *subscriber;
-
- subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
- if (!subscriber) {
- pr_warn("Subscriber rejected, no memory\n");
- return NULL;
- }
- INIT_LIST_HEAD(&subscriber->subscrp_list);
- kref_init(&subscriber->kref);
- subscriber->conid = conid;
- spin_lock_init(&subscriber->lock);
-
- return subscriber;
-}
-
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
- tipc_subscrb_subscrp_delete(subscriber, NULL);
- tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
- struct tipc_subscriber *subscriber)
-{
- tipc_subscrb_get(subscriber);
- tipc_subscrb_subscrp_delete(subscriber, s);
- tipc_subscrb_put(subscriber);
-}
-
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
- struct tipc_subscr *s,
- int swap)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ u32 filter = tipc_sub_read(s, filter);
struct tipc_subscription *sub;
- u32 filter = htohl(s->filter, swap);
+ u32 timeout;
- /* Refuse subscription if global limit exceeded */
- if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
- pr_warn("Subscription rejected, limit reached (%u)\n",
- TIPC_MAX_SUBSCRIPTIONS);
+ if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+ (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+ pr_warn("Subscription rejected, illegal request\n");
return NULL;
}
-
- /* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
pr_warn("Subscription rejected, no memory\n");
return NULL;
}
-
- /* Initialize subscription object */
+ INIT_LIST_HEAD(&sub->service_list);
+ INIT_LIST_HEAD(&sub->sub_list);
sub->net = net;
- if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
- (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
- pr_warn("Subscription rejected, illegal request\n");
- kfree(sub);
- return NULL;
- }
-
- sub->swap = swap;
+ sub->conid = conid;
+ sub->inactive = false;
memcpy(&sub->evt.s, s, sizeof(*s));
- atomic_inc(&tn->subscription_count);
+ spin_lock_init(&sub->lock);
kref_init(&sub->kref);
- return sub;
-}
-
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber, int swap,
- bool status)
-{
- struct tipc_subscription *sub = NULL;
- u32 timeout;
-
- sub = tipc_subscrp_create(net, s, swap);
- if (!sub)
- return -1;
-
- spin_lock_bh(&subscriber->lock);
- list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- sub->subscriber = subscriber;
- tipc_nametbl_subscribe(sub, status);
- tipc_subscrb_get(subscriber);
- spin_unlock_bh(&subscriber->lock);
-
- timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
- timeout = htohl(sub->evt.s.timeout, swap);
-
+ tipc_nametbl_subscribe(sub);
+ timer_setup(&sub->timer, tipc_sub_timeout, 0);
+ timeout = tipc_sub_read(&sub->evt.s, timeout);
if (timeout != TIPC_WAIT_FOREVER)
mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
- return 0;
-}
-
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
- tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len)
-{
- struct tipc_subscriber *subscriber = usr_data;
- struct tipc_subscr *s = (struct tipc_subscr *)buf;
- bool status;
- int swap;
-
- /* Determine subscriber's endianness */
- swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
- TIPC_SUB_CANCEL));
-
- /* Detect & process a subscription cancellation request */
- if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
- s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- tipc_subscrp_cancel(s, subscriber);
- return 0;
- }
- status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
- return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
-}
-
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
- return (void *)tipc_subscrb_create(conid);
-}
-
-int tipc_topsrv_start(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- const char name[] = "topology_server";
- struct tipc_server *topsrv;
- struct sockaddr_tipc *saddr;
-
- saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
- if (!saddr)
- return -ENOMEM;
- saddr->family = AF_TIPC;
- saddr->addrtype = TIPC_ADDR_NAMESEQ;
- saddr->addr.nameseq.type = TIPC_TOP_SRV;
- saddr->addr.nameseq.lower = TIPC_TOP_SRV;
- saddr->addr.nameseq.upper = TIPC_TOP_SRV;
- saddr->scope = TIPC_NODE_SCOPE;
-
- topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
- if (!topsrv) {
- kfree(saddr);
- return -ENOMEM;
- }
- topsrv->net = net;
- topsrv->saddr = saddr;
- topsrv->imp = TIPC_CRITICAL_IMPORTANCE;
- topsrv->type = SOCK_SEQPACKET;
- topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr);
- topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb;
- topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
- topsrv->tipc_conn_release = tipc_subscrb_release_cb;
-
- strncpy(topsrv->name, name, strlen(name) + 1);
- tn->topsrv = topsrv;
- atomic_set(&tn->subscription_count, 0);
-
- return tipc_server_start(topsrv);
+ return sub;
}
-void tipc_topsrv_stop(struct net *net)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_server *topsrv = tn->topsrv;
-
- tipc_server_stop(topsrv);
- kfree(topsrv->saddr);
- kfree(topsrv);
+ tipc_nametbl_unsubscribe(sub);
+ if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+ del_timer_sync(&sub->timer);
+ list_del(&sub->sub_list);
+ tipc_sub_put(sub);
}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index f3edca775d9f..d793b4343885 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,7 +1,7 @@
/*
* net/tipc/subscr.h: Include file for TIPC network topology service
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
* Copyright (c) 2005-2007, 2012-2013, Wind River Systems
* All rights reserved.
*
@@ -37,48 +37,72 @@
#ifndef _TIPC_SUBSCR_H
#define _TIPC_SUBSCR_H
-#include "server.h"
+#include "topsrv.h"
-#define TIPC_MAX_SUBSCRIPTIONS 65535
-#define TIPC_MAX_PUBLICATIONS 65535
+#define TIPC_MAX_SUBSCR 65535
+#define TIPC_MAX_PUBL 65535
struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
/**
* struct tipc_subscription - TIPC network topology subscription object
* @subscriber: pointer to its subscriber
* @seq: name sequence associated with subscription
- * @net: point to network namespace
* @timer: timer governing subscription duration (optional)
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
struct kref kref;
- struct tipc_subscriber *subscriber;
struct net *net;
struct timer_list timer;
- struct list_head nameseq_list;
- struct list_head subscrp_list;
- int swap;
+ struct list_head service_list;
+ struct list_head sub_list;
struct tipc_event evt;
+ int conid;
+ bool inactive;
+ spinlock_t lock; /* serialize up/down and timer events */
};
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper, u32 event,
- u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
- struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+ u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port, u32 node,
+ u32 scope, int must);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
+
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = (sub__)->field_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (swap_ ? swab32(val__) : val__); \
+ })
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_) \
+ ({ \
+ struct tipc_event *evt__ = evt_; \
+ u32 val__ = val_; \
+ int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \
+ (evt__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
#endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644
index 000000000000..c8e34ef22c30
--- /dev/null
+++ b/net/tipc/topsrv.c
@@ -0,0 +1,703 @@
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include <net/sock.h>
+#include <linux/module.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT 25
+#define MAX_RECV_MSG_COUNT 25
+#define CF_CONNECTED 1
+#define CF_SERVER 2
+
+#define TIPC_SERVER_NAME_LEN 32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_topsrv {
+ struct idr conn_idr;
+ spinlock_t idr_lock; /* for idr list */
+ int idr_in_use;
+ struct net *net;
+ struct work_struct awork;
+ struct workqueue_struct *rcv_wq;
+ struct workqueue_struct *send_wq;
+ int max_rcvbuf_size;
+ struct socket *listener;
+ char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
+ * @rwork: receive work item
+ * @rx_action: what to do when connection socket is active
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+ struct kref kref;
+ int conid;
+ struct socket *sock;
+ unsigned long flags;
+ struct tipc_topsrv *server;
+ struct list_head sub_list;
+ spinlock_t sub_lock; /* for subscription list */
+ struct work_struct rwork;
+ struct list_head outqueue;
+ spinlock_t outqueue_lock; /* for outqueue */
+ struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+ bool inactive;
+ struct tipc_event evt;
+ struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+ return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+ struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+ struct tipc_topsrv *s = con->server;
+ struct outqueue_entry *e, *safe;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
+ if (con->sock)
+ sock_release(con->sock);
+
+ spin_lock_bh(&con->outqueue_lock);
+ list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+ kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+ kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+ kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+ struct sock *sk = con->sock->sk;
+ bool disconnect = false;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+ if (disconnect) {
+ sk->sk_user_data = NULL;
+ tipc_conn_delete_sub(con, NULL);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ /* Handle concurrent calls from sending and receiving threads */
+ if (!disconnect)
+ return;
+
+ /* Don't flush pending works, -just let them expire */
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+ conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+{
+ struct tipc_conn *con;
+ int ret;
+
+ con = kzalloc(sizeof(*con), GFP_ATOMIC);
+ if (!con)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&con->kref);
+ INIT_LIST_HEAD(&con->outqueue);
+ INIT_LIST_HEAD(&con->sub_list);
+ spin_lock_init(&con->outqueue_lock);
+ spin_lock_init(&con->sub_lock);
+ INIT_WORK(&con->swork, tipc_conn_send_work);
+ INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+ spin_lock_bh(&s->idr_lock);
+ ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+ if (ret < 0) {
+ kfree(con);
+ spin_unlock_bh(&s->idr_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ con->conid = ret;
+ s->idr_in_use++;
+ spin_unlock_bh(&s->idr_lock);
+
+ set_bit(CF_CONNECTED, &con->flags);
+ con->server = s;
+
+ return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+ struct tipc_conn *con;
+
+ spin_lock_bh(&s->idr_lock);
+ con = idr_find(&s->conn_idr, conid);
+ if (!connected(con) || !kref_get_unless_zero(&con->kref))
+ con = NULL;
+ spin_unlock_bh(&s->idr_lock);
+ return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(con->server->net);
+ struct list_head *sub_list = &con->sub_list;
+ struct tipc_subscription *sub, *tmp;
+
+ spin_lock_bh(&con->sub_lock);
+ list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+ if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+ tipc_sub_unsubscribe(sub);
+ atomic_dec(&tn->subscription_count);
+ } else if (s) {
+ break;
+ }
+ }
+ spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+ struct list_head *queue = &con->outqueue;
+ struct tipc_topsrv *srv = con->server;
+ struct outqueue_entry *e;
+ struct tipc_event *evt;
+ struct msghdr msg;
+ struct kvec iov;
+ int count = 0;
+ int ret;
+
+ spin_lock_bh(&con->outqueue_lock);
+
+ while (!list_empty(queue)) {
+ e = list_first_entry(queue, struct outqueue_entry, list);
+ evt = &e->evt;
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (e->inactive)
+ tipc_conn_delete_sub(con, &evt->s);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ iov.iov_base = evt;
+ iov.iov_len = sizeof(*evt);
+ msg.msg_name = NULL;
+
+ if (con->sock) {
+ ret = kernel_sendmsg(con->sock, &msg, &iov,
+ 1, sizeof(*evt));
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ return;
+ } else if (ret < 0) {
+ return tipc_conn_close(con);
+ }
+ } else {
+ tipc_topsrv_kern_evt(srv->net, evt);
+ }
+
+ /* Don't starve users filling buffers */
+ if (++count >= MAX_SEND_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ spin_lock_bh(&con->outqueue_lock);
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+ if (connected(con))
+ tipc_conn_send_to_sock(con);
+
+ conn_put(con);
+}
+
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct outqueue_entry *e;
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(srv, conid);
+ if (!con)
+ return;
+
+ if (!connected(con))
+ goto err;
+
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
+ if (!e)
+ goto err;
+ e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+ memcpy(&e->evt, evt, sizeof(*evt));
+ spin_lock_bh(&con->outqueue_lock);
+ list_add_tail(&e->list, &con->outqueue);
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (queue_work(srv->send_wq, &con->swork))
+ return;
+err:
+ conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->send_wq, &con->swork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+ struct tipc_conn *con,
+ struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(srv->net);
+ struct tipc_subscription *sub;
+
+ if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+ tipc_conn_delete_sub(con, s);
+ return 0;
+ }
+ if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+ pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+ return -1;
+ }
+ sub = tipc_sub_subscribe(srv->net, s, con->conid);
+ if (!sub)
+ return -1;
+ atomic_inc(&tn->subscription_count);
+ spin_lock_bh(&con->sub_lock);
+ list_add(&sub->sub_list, &con->sub_list);
+ spin_unlock_bh(&con->sub_lock);
+ return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+ struct tipc_topsrv *srv = con->server;
+ struct sock *sk = con->sock->sk;
+ struct msghdr msg = {};
+ struct tipc_subscr s;
+ struct kvec iov;
+ int ret;
+
+ iov.iov_base = &s;
+ iov.iov_len = sizeof(s);
+ msg.msg_name = NULL;
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+ if (ret == -EWOULDBLOCK)
+ return -EWOULDBLOCK;
+ if (ret > 0) {
+ read_lock_bh(&sk->sk_callback_lock);
+ ret = tipc_conn_rcv_sub(srv, con, &s);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
+ if (ret < 0)
+ tipc_conn_close(con);
+
+ return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+ int count = 0;
+
+ while (connected(con)) {
+ if (tipc_conn_rcv_from_sock(con))
+ break;
+
+ /* Don't flood Rx machine */
+ if (++count >= MAX_RECV_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ }
+ conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->rcv_wq, &con->rwork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+ struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+ struct socket *lsock = srv->listener;
+ struct socket *newsock;
+ struct tipc_conn *con;
+ struct sock *newsk;
+ int ret;
+
+ while (1) {
+ ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+ if (ret < 0)
+ return;
+ con = tipc_conn_alloc(srv);
+ if (IS_ERR(con)) {
+ ret = PTR_ERR(con);
+ sock_release(newsock);
+ return;
+ }
+ /* Register callbacks */
+ newsk = newsock->sk;
+ write_lock_bh(&newsk->sk_callback_lock);
+ newsk->sk_data_ready = tipc_conn_data_ready;
+ newsk->sk_write_space = tipc_conn_write_space;
+ newsk->sk_user_data = con;
+ con->sock = newsock;
+ write_unlock_bh(&newsk->sk_callback_lock);
+
+ /* Wake up receive process in case of 'SYN+' message */
+ newsk->sk_data_ready(newsk);
+ }
+}
+
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+ struct tipc_topsrv *srv;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ srv = sk->sk_user_data;
+ if (srv->listener)
+ queue_work(srv->rcv_wq, &srv->awork);
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+ int imp = TIPC_CRITICAL_IMPORTANCE;
+ struct socket *lsock = NULL;
+ struct sockaddr_tipc saddr;
+ struct sock *sk;
+ int rc;
+
+ rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+ if (rc < 0)
+ return rc;
+
+ srv->listener = lsock;
+ sk = lsock->sk;
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+ sk->sk_user_data = srv;
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+ (char *)&imp, sizeof(imp));
+ if (rc < 0)
+ goto err;
+
+ saddr.family = AF_TIPC;
+ saddr.addrtype = TIPC_ADDR_NAMESEQ;
+ saddr.addr.nameseq.type = TIPC_TOP_SRV;
+ saddr.addr.nameseq.lower = TIPC_TOP_SRV;
+ saddr.addr.nameseq.upper = TIPC_TOP_SRV;
+ saddr.scope = TIPC_NODE_SCOPE;
+
+ rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (rc < 0)
+ goto err;
+ rc = kernel_listen(lsock, 0);
+ if (rc < 0)
+ goto err;
+
+ /* As server's listening socket owner and creator is the same module,
+ * we have to decrease TIPC module reference count to guarantee that
+ * it remains zero after the server socket is created, otherwise,
+ * executing "rmmod" command is unable to make TIPC module deleted
+ * after TIPC module is inserted successfully.
+ *
+ * However, the reference count is ever increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of TIPC socket's proto_ops struct; another is to increment the
+ * reference count of owner of TIPC proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(lsock->ops->owner);
+ module_put(sk->sk_prot_creator->owner);
+
+ return 0;
+err:
+ sock_release(lsock);
+ return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid)
+{
+ struct tipc_subscr sub;
+ struct tipc_conn *con;
+ int rc;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = filter;
+ *(u32 *)&sub.usr_handle = port;
+
+ con = tipc_conn_alloc(tipc_topsrv(net));
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ con->sock = NULL;
+ rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+ if (rc >= 0)
+ return true;
+ conn_put(con);
+ return false;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+
+ test_and_clear_bit(CF_CONNECTED, &con->flags);
+ tipc_conn_delete_sub(con, NULL);
+ conn_put(con);
+ conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+ s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+ if (!s->rcv_wq) {
+ pr_err("can't start tipc receive workqueue\n");
+ return -ENOMEM;
+ }
+
+ s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+ if (!s->send_wq) {
+ pr_err("can't start tipc send workqueue\n");
+ destroy_workqueue(s->rcv_wq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+ destroy_workqueue(s->rcv_wq);
+ destroy_workqueue(s->send_wq);
+}
+
+int tipc_topsrv_start(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ const char name[] = "topology_server";
+ struct tipc_topsrv *srv;
+ int ret;
+
+ srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+ if (!srv)
+ return -ENOMEM;
+
+ srv->net = net;
+ srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+ INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+ strncpy(srv->name, name, strlen(name) + 1);
+ tn->topsrv = srv;
+ atomic_set(&tn->subscription_count, 0);
+
+ spin_lock_init(&srv->idr_lock);
+ idr_init(&srv->conn_idr);
+ srv->idr_in_use = 0;
+
+ ret = tipc_topsrv_work_start(srv);
+ if (ret < 0)
+ return ret;
+
+ ret = tipc_topsrv_create_listener(srv);
+ if (ret < 0)
+ tipc_topsrv_work_stop(srv);
+
+ return ret;
+}
+
+void tipc_topsrv_stop(struct net *net)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct socket *lsock = srv->listener;
+ struct tipc_conn *con;
+ int id;
+
+ spin_lock_bh(&srv->idr_lock);
+ for (id = 0; srv->idr_in_use; id++) {
+ con = idr_find(&srv->conn_idr, id);
+ if (con) {
+ spin_unlock_bh(&srv->idr_lock);
+ tipc_conn_close(con);
+ spin_lock_bh(&srv->idr_lock);
+ }
+ }
+ __module_get(lsock->ops->owner);
+ __module_get(lsock->sk->sk_prot_creator->owner);
+ srv->listener = NULL;
+ spin_unlock_bh(&srv->idr_lock);
+ sock_release(lsock);
+ tipc_topsrv_work_stop(srv);
+ idr_destroy(&srv->conn_idr);
+ kfree(srv);
+}
diff --git a/net/tipc/server.h b/net/tipc/topsrv.h
index 64df7513cd70..c7ea71293748 100644
--- a/net/tipc/server.h
+++ b/net/tipc/topsrv.h
@@ -2,6 +2,7 @@
* net/tipc/server.h: Include file for TIPC server code
*
* Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,68 +37,18 @@
#ifndef _TIPC_SERVER_H
#define _TIPC_SERVER_H
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
+#include "core.h"
#define TIPC_SERVER_NAME_LEN 32
#define TIPC_SUB_CLUSTER_SCOPE 0x20
#define TIPC_SUB_NODE_SCOPE 0x40
#define TIPC_SUB_NO_STATUS 0x80
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
- struct idr conn_idr;
- spinlock_t idr_lock;
- int idr_in_use;
- struct net *net;
- struct kmem_cache *rcvbuf_cache;
- struct workqueue_struct *rcv_wq;
- struct workqueue_struct *send_wq;
- int max_rcvbuf_size;
- void *(*tipc_conn_new)(int conid);
- void (*tipc_conn_release)(int conid, void *usr_data);
- int (*tipc_conn_recvmsg)(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len);
- struct sockaddr_tipc *saddr;
- char name[TIPC_SERVER_NAME_LEN];
- int imp;
- int type;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len);
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt);
bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
u32 upper, u32 filter, int *conid);
void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
#endif
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 3deabcab4882..e7d91f5d5cae 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -47,6 +47,8 @@
#include <net/addrconf.h>
#include <linux/tipc_netlink.h>
#include "core.h"
+#include "addr.h"
+#include "net.h"
#include "bearer.h"
#include "netlink.h"
#include "msg.h"
@@ -647,6 +649,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
struct udp_port_cfg udp_conf = {0};
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+ u8 node_id[NODE_ID_LEN] = {0,};
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
if (!ub)
@@ -677,6 +680,17 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (err)
goto err;
+ /* Autoconfigure own node identity if needed */
+ if (!tipc_own_id(net)) {
+ memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
+ tipc_net_init(net, node_id, 0);
+ }
+ if (!tipc_own_id(net)) {
+ pr_warn("Failed to set node id, please configure manually\n");
+ err = -EINVAL;
+ goto err;
+ }
+
b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
rcu_assign_pointer(b->media_ptr, ub);
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index eb583038c67e..89b8745a986f 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -7,6 +7,7 @@ config TLS
select CRYPTO
select CRYPTO_AES
select CRYPTO_GCM
+ select STREAM_PARSER
default n
---help---
Enable kernel support for TLS protocol. This allows symmetric
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e9b4b53ab53e..0d379970960e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -38,6 +38,7 @@
#include <linux/highmem.h>
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
+#include <linux/inetdevice.h>
#include <net/tls.h>
@@ -46,16 +47,32 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
enum {
- TLS_BASE_TX,
+ TLSV4,
+ TLSV6,
+ TLS_NUM_PROTS,
+};
+
+enum {
+ TLS_BASE,
TLS_SW_TX,
+ TLS_SW_RX,
+ TLS_SW_RXTX,
+ TLS_HW_RECORD,
TLS_NUM_CONFIG,
};
-static struct proto tls_prots[TLS_NUM_CONFIG];
+static struct proto *saved_tcpv6_prot;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
+static LIST_HEAD(device_list);
+static DEFINE_MUTEX(device_mutex);
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto_ops tls_sw_proto_ops;
static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
{
- sk->sk_prot = &tls_prots[ctx->tx_conf];
+ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+
+ sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
}
int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -228,8 +245,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
- if (ctx->tx_conf == TLS_BASE_TX) {
+ if (ctx->conf == TLS_HW_RECORD)
+ goto skip_tx_cleanup;
+
+ if (ctx->conf == TLS_BASE) {
kfree(ctx);
+ ctx = NULL;
goto skip_tx_cleanup;
}
@@ -249,15 +270,25 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
}
}
- kfree(ctx->rec_seq);
- kfree(ctx->iv);
+ kfree(ctx->tx.rec_seq);
+ kfree(ctx->tx.iv);
+ kfree(ctx->rx.rec_seq);
+ kfree(ctx->rx.iv);
- if (ctx->tx_conf == TLS_SW_TX)
- tls_sw_free_tx_resources(sk);
+ if (ctx->conf == TLS_SW_TX ||
+ ctx->conf == TLS_SW_RX ||
+ ctx->conf == TLS_SW_RXTX) {
+ tls_sw_free_resources(sk);
+ }
skip_tx_cleanup:
release_sock(sk);
sk_proto_close(sk, timeout);
+ /* free ctx for TLS_HW_RECORD, used by tcp_set_state
+ * for sk->sk_prot->unhash [tls_hw_unhash]
+ */
+ if (ctx && ctx->conf == TLS_HW_RECORD)
+ kfree(ctx);
}
static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -309,9 +340,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
}
lock_sock(sk);
memcpy(crypto_info_aes_gcm_128->iv,
- ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
TLS_CIPHER_AES_GCM_128_IV_SIZE);
- memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
+ memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval,
@@ -355,20 +386,24 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
return do_tls_getsockopt(sk, optname, optval, optlen);
}
-static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
- unsigned int optlen)
+static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+ unsigned int optlen, int tx)
{
struct tls_crypto_info *crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
int rc = 0;
- int tx_conf;
+ int conf;
if (!optval || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
- crypto_info = &ctx->crypto_send;
+ if (tx)
+ crypto_info = &ctx->crypto_send;
+ else
+ crypto_info = &ctx->crypto_recv;
+
/* Currently we don't support set crypto info more than one time */
if (TLS_CRYPTO_INFO_READY(crypto_info)) {
rc = -EBUSY;
@@ -407,15 +442,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
}
/* currently SW is default, we will have ethtool in future */
- rc = tls_set_sw_offload(sk, ctx);
- tx_conf = TLS_SW_TX;
+ if (tx) {
+ rc = tls_set_sw_offload(sk, ctx, 1);
+ if (ctx->conf == TLS_SW_RX)
+ conf = TLS_SW_RXTX;
+ else
+ conf = TLS_SW_TX;
+ } else {
+ rc = tls_set_sw_offload(sk, ctx, 0);
+ if (ctx->conf == TLS_SW_TX)
+ conf = TLS_SW_RXTX;
+ else
+ conf = TLS_SW_RX;
+ }
+
if (rc)
goto err_crypto_info;
- ctx->tx_conf = tx_conf;
+ ctx->conf = conf;
update_sk_prot(sk, ctx);
- ctx->sk_write_space = sk->sk_write_space;
- sk->sk_write_space = tls_write_space;
+ if (tx) {
+ ctx->sk_write_space = sk->sk_write_space;
+ sk->sk_write_space = tls_write_space;
+ } else {
+ sk->sk_socket->ops = &tls_sw_proto_ops;
+ }
goto out;
err_crypto_info:
@@ -431,8 +482,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
switch (optname) {
case TLS_TX:
+ case TLS_RX:
lock_sock(sk);
- rc = do_tls_setsockopt_tx(sk, optval, optlen);
+ rc = do_tls_setsockopt_conf(sk, optval, optlen,
+ optname == TLS_TX);
release_sock(sk);
break;
default:
@@ -453,12 +506,113 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
return do_tls_setsockopt(sk, optname, optval, optlen);
}
-static int tls_init(struct sock *sk)
+static struct tls_context *create_ctx(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tls_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+
+ icsk->icsk_ulp_data = ctx;
+ return ctx;
+}
+
+static int tls_hw_prot(struct sock *sk)
+{
+ struct tls_context *ctx;
+ struct tls_device *dev;
+ int rc = 0;
+
+ mutex_lock(&device_mutex);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->feature && dev->feature(dev)) {
+ ctx = create_ctx(sk);
+ if (!ctx)
+ goto out;
+
+ ctx->hash = sk->sk_prot->hash;
+ ctx->unhash = sk->sk_prot->unhash;
+ ctx->sk_proto_close = sk->sk_prot->close;
+ ctx->conf = TLS_HW_RECORD;
+ update_sk_prot(sk, ctx);
+ rc = 1;
+ break;
+ }
+ }
+out:
+ mutex_unlock(&device_mutex);
+ return rc;
+}
+
+static void tls_hw_unhash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_device *dev;
+
+ mutex_lock(&device_mutex);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->unhash)
+ dev->unhash(dev, sk);
+ }
+ mutex_unlock(&device_mutex);
+ ctx->unhash(sk);
+}
+
+static int tls_hw_hash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_device *dev;
+ int err;
+
+ err = ctx->hash(sk);
+ mutex_lock(&device_mutex);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->hash)
+ err |= dev->hash(dev, sk);
+ }
+ mutex_unlock(&device_mutex);
+
+ if (err)
+ tls_hw_unhash(sk);
+ return err;
+}
+
+static void build_protos(struct proto *prot, struct proto *base)
+{
+ prot[TLS_BASE] = *base;
+ prot[TLS_BASE].setsockopt = tls_setsockopt;
+ prot[TLS_BASE].getsockopt = tls_getsockopt;
+ prot[TLS_BASE].close = tls_sk_proto_close;
+
+ prot[TLS_SW_TX] = prot[TLS_BASE];
+ prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
+ prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
+
+ prot[TLS_SW_RX] = prot[TLS_BASE];
+ prot[TLS_SW_RX].recvmsg = tls_sw_recvmsg;
+ prot[TLS_SW_RX].close = tls_sk_proto_close;
+
+ prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
+ prot[TLS_SW_RXTX].recvmsg = tls_sw_recvmsg;
+ prot[TLS_SW_RXTX].close = tls_sk_proto_close;
+
+ prot[TLS_HW_RECORD] = *base;
+ prot[TLS_HW_RECORD].hash = tls_hw_hash;
+ prot[TLS_HW_RECORD].unhash = tls_hw_unhash;
+ prot[TLS_HW_RECORD].close = tls_sk_proto_close;
+}
+
+static int tls_init(struct sock *sk)
+{
+ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+ struct tls_context *ctx;
int rc = 0;
+ if (tls_hw_prot(sk))
+ goto out;
+
/* The TLS ulp is currently supported only for TCP sockets
* in ESTABLISHED state.
* Supporting sockets in LISTEN state will require us
@@ -469,22 +623,48 @@ static int tls_init(struct sock *sk)
return -ENOTSUPP;
/* allocate tls context */
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = create_ctx(sk);
if (!ctx) {
rc = -ENOMEM;
goto out;
}
- icsk->icsk_ulp_data = ctx;
ctx->setsockopt = sk->sk_prot->setsockopt;
ctx->getsockopt = sk->sk_prot->getsockopt;
ctx->sk_proto_close = sk->sk_prot->close;
- ctx->tx_conf = TLS_BASE_TX;
+ /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
+ if (ip_ver == TLSV6 &&
+ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+ mutex_lock(&tcpv6_prot_mutex);
+ if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+ build_protos(tls_prots[TLSV6], sk->sk_prot);
+ smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+ }
+ mutex_unlock(&tcpv6_prot_mutex);
+ }
+
+ ctx->conf = TLS_BASE;
update_sk_prot(sk, ctx);
out:
return rc;
}
+void tls_register_device(struct tls_device *device)
+{
+ mutex_lock(&device_mutex);
+ list_add_tail(&device->dev_list, &device_list);
+ mutex_unlock(&device_mutex);
+}
+EXPORT_SYMBOL(tls_register_device);
+
+void tls_unregister_device(struct tls_device *device)
+{
+ mutex_lock(&device_mutex);
+ list_del(&device->dev_list);
+ mutex_unlock(&device_mutex);
+}
+EXPORT_SYMBOL(tls_unregister_device);
+
static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.name = "tls",
.uid = TCP_ULP_TLS,
@@ -493,21 +673,13 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.init = tls_init,
};
-static void build_protos(struct proto *prot, struct proto *base)
-{
- prot[TLS_BASE_TX] = *base;
- prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
- prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
- prot[TLS_BASE_TX].close = tls_sk_proto_close;
-
- prot[TLS_SW_TX] = prot[TLS_BASE_TX];
- prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
- prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
-}
-
static int __init tls_register(void)
{
- build_protos(tls_prots, &tcp_prot);
+ build_protos(tls_prots[TLSV4], &tcp_prot);
+
+ tls_sw_proto_ops = inet_stream_ops;
+ tls_sw_proto_ops.poll = tls_sw_poll;
+ tls_sw_proto_ops.splice_read = tls_sw_splice_read;
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f26376e954ae..4dc766b03f00 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -34,11 +34,60 @@
* SOFTWARE.
*/
+#include <linux/sched/signal.h>
#include <linux/module.h>
#include <crypto/aead.h>
+#include <net/strparser.h>
#include <net/tls.h>
+static int tls_do_decryption(struct sock *sk,
+ struct scatterlist *sgin,
+ struct scatterlist *sgout,
+ char *iv_recv,
+ size_t data_len,
+ struct sk_buff *skb,
+ gfp_t flags)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct strp_msg *rxm = strp_msg(skb);
+ struct aead_request *aead_req;
+
+ int ret;
+ unsigned int req_size = sizeof(struct aead_request) +
+ crypto_aead_reqsize(ctx->aead_recv);
+
+ aead_req = kzalloc(req_size, flags);
+ if (!aead_req)
+ return -ENOMEM;
+
+ aead_request_set_tfm(aead_req, ctx->aead_recv);
+ aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+ aead_request_set_crypt(aead_req, sgin, sgout,
+ data_len + tls_ctx->rx.tag_size,
+ (u8 *)iv_recv);
+ aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &ctx->async_wait);
+
+ ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+
+ if (ret < 0)
+ goto out;
+
+ rxm->offset += tls_ctx->rx.prepend_size;
+ rxm->full_len -= tls_ctx->rx.overhead_size;
+ tls_advance_record_sn(sk, &tls_ctx->rx);
+
+ ctx->decrypted = true;
+
+ ctx->saved_data_ready(sk);
+
+out:
+ kfree(aead_req);
+ return ret;
+}
+
static void trim_sg(struct sock *sk, struct scatterlist *sg,
int *sg_num_elem, unsigned int *sg_size, int target_size)
{
@@ -79,7 +128,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
target_size);
if (target_size > 0)
- target_size += tls_ctx->overhead_size;
+ target_size += tls_ctx->tx.overhead_size;
trim_sg(sk, ctx->sg_encrypted_data,
&ctx->sg_encrypted_num_elem,
@@ -87,71 +136,16 @@ static void trim_both_sgl(struct sock *sk, int target_size)
target_size);
}
-static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
- int *sg_num_elem, unsigned int *sg_size,
- int first_coalesce)
-{
- struct page_frag *pfrag;
- unsigned int size = *sg_size;
- int num_elem = *sg_num_elem, use = 0, rc = 0;
- struct scatterlist *sge;
- unsigned int orig_offset;
-
- len -= size;
- pfrag = sk_page_frag(sk);
-
- while (len > 0) {
- if (!sk_page_frag_refill(sk, pfrag)) {
- rc = -ENOMEM;
- goto out;
- }
-
- use = min_t(int, len, pfrag->size - pfrag->offset);
-
- if (!sk_wmem_schedule(sk, use)) {
- rc = -ENOMEM;
- goto out;
- }
-
- sk_mem_charge(sk, use);
- size += use;
- orig_offset = pfrag->offset;
- pfrag->offset += use;
-
- sge = sg + num_elem - 1;
- if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
- sg->offset + sg->length == orig_offset) {
- sg->length += use;
- } else {
- sge++;
- sg_unmark_end(sge);
- sg_set_page(sge, pfrag->page, use, orig_offset);
- get_page(pfrag->page);
- ++num_elem;
- if (num_elem == MAX_SKB_FRAGS) {
- rc = -ENOSPC;
- break;
- }
- }
-
- len -= use;
- }
- goto out;
-
-out:
- *sg_size = size;
- *sg_num_elem = num_elem;
- return rc;
-}
-
static int alloc_encrypted_sg(struct sock *sk, int len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
int rc = 0;
- rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
- &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+ rc = sk_alloc_sg(sk, len,
+ ctx->sg_encrypted_data, 0,
+ &ctx->sg_encrypted_num_elem,
+ &ctx->sg_encrypted_size, 0);
return rc;
}
@@ -162,9 +156,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
int rc = 0;
- rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
- &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
- tls_ctx->pending_open_record_frags);
+ rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
+ &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+ tls_ctx->pending_open_record_frags);
return rc;
}
@@ -207,21 +201,21 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
if (!aead_req)
return -ENOMEM;
- ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size;
- ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size;
+ ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
+ ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
aead_request_set_tfm(aead_req, ctx->aead_send);
aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
- data_len, tls_ctx->iv);
+ data_len, tls_ctx->tx.iv);
aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &ctx->async_wait);
rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
- ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
- ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
+ ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
+ ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
kfree(aead_req);
return rc;
@@ -238,7 +232,7 @@ static int tls_push_record(struct sock *sk, int flags,
sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
- tls_ctx->rec_seq, tls_ctx->rec_seq_size,
+ tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
record_type);
tls_fill_prepend(tls_ctx,
@@ -269,9 +263,9 @@ static int tls_push_record(struct sock *sk, int flags,
/* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
if (rc < 0 && rc != -EAGAIN)
- tls_err_abort(sk);
+ tls_err_abort(sk, EBADMSG);
- tls_advance_record_sn(sk, tls_ctx);
+ tls_advance_record_sn(sk, &tls_ctx->tx);
return rc;
}
@@ -281,23 +275,24 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
}
static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
- int length)
+ int length, int *pages_used,
+ unsigned int *size_used,
+ struct scatterlist *to, int to_max_pages,
+ bool charge)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
struct page *pages[MAX_SKB_FRAGS];
size_t offset;
ssize_t copied, use;
int i = 0;
- unsigned int size = ctx->sg_plaintext_size;
- int num_elem = ctx->sg_plaintext_num_elem;
+ unsigned int size = *size_used;
+ int num_elem = *pages_used;
int rc = 0;
int maxpages;
while (length > 0) {
i = 0;
- maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
+ maxpages = to_max_pages - num_elem;
if (maxpages == 0) {
rc = -EFAULT;
goto out;
@@ -317,10 +312,11 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
while (copied) {
use = min_t(int, copied, PAGE_SIZE - offset);
- sg_set_page(&ctx->sg_plaintext_data[num_elem],
+ sg_set_page(&to[num_elem],
pages[i], use, offset);
- sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
- sk_mem_charge(sk, use);
+ sg_unmark_end(&to[num_elem]);
+ if (charge)
+ sk_mem_charge(sk, use);
offset = 0;
copied -= use;
@@ -331,8 +327,9 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
}
out:
- ctx->sg_plaintext_size = size;
- ctx->sg_plaintext_num_elem = num_elem;
+ *size_used = size;
+ *pages_used = num_elem;
+
return rc;
}
@@ -409,7 +406,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
required_size = ctx->sg_plaintext_size + try_to_copy +
- tls_ctx->overhead_size;
+ tls_ctx->tx.overhead_size;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -429,7 +426,11 @@ alloc_encrypted:
if (full_record || eor) {
ret = zerocopy_from_iter(sk, &msg->msg_iter,
- try_to_copy);
+ try_to_copy, &ctx->sg_plaintext_num_elem,
+ &ctx->sg_plaintext_size,
+ ctx->sg_plaintext_data,
+ ARRAY_SIZE(ctx->sg_plaintext_data),
+ true);
if (ret)
goto fallback_to_reg_send;
@@ -468,7 +469,7 @@ alloc_plaintext:
&ctx->sg_encrypted_num_elem,
&ctx->sg_encrypted_size,
ctx->sg_plaintext_size +
- tls_ctx->overhead_size);
+ tls_ctx->tx.overhead_size);
}
ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
@@ -560,7 +561,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
full_record = true;
}
required_size = ctx->sg_plaintext_size + copy +
- tls_ctx->overhead_size;
+ tls_ctx->tx.overhead_size;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -629,13 +630,404 @@ sendpage_end:
return ret;
}
-void tls_sw_free_tx_resources(struct sock *sk)
+static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
+ long timeo, int *err)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct sk_buff *skb;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ while (!(skb = ctx->recv_pkt)) {
+ if (sk->sk_err) {
+ *err = sock_error(sk);
+ return NULL;
+ }
+
+ if (sock_flag(sk, SOCK_DONE))
+ return NULL;
+
+ if ((flags & MSG_DONTWAIT) || !timeo) {
+ *err = -EAGAIN;
+ return NULL;
+ }
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+
+ /* Handle signals */
+ if (signal_pending(current)) {
+ *err = sock_intr_errno(timeo);
+ return NULL;
+ }
+ }
+
+ return skb;
+}
+
+static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+ struct scatterlist *sgout)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+ struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
+ struct scatterlist *sgin = &sgin_arr[0];
+ struct strp_msg *rxm = strp_msg(skb);
+ int ret, nsg = ARRAY_SIZE(sgin_arr);
+ char aad_recv[TLS_AAD_SPACE_SIZE];
+ struct sk_buff *unused;
+
+ ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+ iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ tls_ctx->rx.iv_size);
+ if (ret < 0)
+ return ret;
+
+ memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ if (!sgout) {
+ nsg = skb_cow_data(skb, 0, &unused) + 1;
+ sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
+ if (!sgout)
+ sgout = sgin;
+ }
+
+ sg_init_table(sgin, nsg);
+ sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+
+ nsg = skb_to_sgvec(skb, &sgin[1],
+ rxm->offset + tls_ctx->rx.prepend_size,
+ rxm->full_len - tls_ctx->rx.prepend_size);
+
+ tls_make_aad(aad_recv,
+ rxm->full_len - tls_ctx->rx.overhead_size,
+ tls_ctx->rx.rec_seq,
+ tls_ctx->rx.rec_seq_size,
+ ctx->control);
+
+ ret = tls_do_decryption(sk, sgin, sgout, iv,
+ rxm->full_len - tls_ctx->rx.overhead_size,
+ skb, sk->sk_allocation);
+
+ if (sgin != &sgin_arr[0])
+ kfree(sgin);
+
+ return ret;
+}
+
+static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int len)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct strp_msg *rxm = strp_msg(skb);
+
+ if (len < rxm->full_len) {
+ rxm->offset += len;
+ rxm->full_len -= len;
+
+ return false;
+ }
+
+ /* Finished with message */
+ ctx->recv_pkt = NULL;
+ kfree_skb(skb);
+ strp_unpause(&ctx->strp);
+
+ return true;
+}
+
+int tls_sw_recvmsg(struct sock *sk,
+ struct msghdr *msg,
+ size_t len,
+ int nonblock,
+ int flags,
+ int *addr_len)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ unsigned char control;
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+ ssize_t copied = 0;
+ bool cmsg = false;
+ int err = 0;
+ long timeo;
+
+ flags |= nonblock;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ do {
+ bool zc = false;
+ int chunk = 0;
+
+ skb = tls_wait_data(sk, flags, timeo, &err);
+ if (!skb)
+ goto recv_end;
+
+ rxm = strp_msg(skb);
+ if (!cmsg) {
+ int cerr;
+
+ cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+ sizeof(ctx->control), &ctx->control);
+ cmsg = true;
+ control = ctx->control;
+ if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ if (cerr || msg->msg_flags & MSG_CTRUNC) {
+ err = -EIO;
+ goto recv_end;
+ }
+ }
+ } else if (control != ctx->control) {
+ goto recv_end;
+ }
+
+ if (!ctx->decrypted) {
+ int page_count;
+ int to_copy;
+
+ page_count = iov_iter_npages(&msg->msg_iter,
+ MAX_SKB_FRAGS);
+ to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
+ if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
+ likely(!(flags & MSG_PEEK))) {
+ struct scatterlist sgin[MAX_SKB_FRAGS + 1];
+ char unused[21];
+ int pages = 0;
+
+ zc = true;
+ sg_init_table(sgin, MAX_SKB_FRAGS + 1);
+ sg_set_buf(&sgin[0], unused, 13);
+
+ err = zerocopy_from_iter(sk, &msg->msg_iter,
+ to_copy, &pages,
+ &chunk, &sgin[1],
+ MAX_SKB_FRAGS, false);
+ if (err < 0)
+ goto fallback_to_reg_recv;
+
+ err = decrypt_skb(sk, skb, sgin);
+ for (; pages > 0; pages--)
+ put_page(sg_page(&sgin[pages]));
+ if (err < 0) {
+ tls_err_abort(sk, EBADMSG);
+ goto recv_end;
+ }
+ } else {
+fallback_to_reg_recv:
+ err = decrypt_skb(sk, skb, NULL);
+ if (err < 0) {
+ tls_err_abort(sk, EBADMSG);
+ goto recv_end;
+ }
+ }
+ ctx->decrypted = true;
+ }
+
+ if (!zc) {
+ chunk = min_t(unsigned int, rxm->full_len, len);
+ err = skb_copy_datagram_msg(skb, rxm->offset, msg,
+ chunk);
+ if (err < 0)
+ goto recv_end;
+ }
+
+ copied += chunk;
+ len -= chunk;
+ if (likely(!(flags & MSG_PEEK))) {
+ u8 control = ctx->control;
+
+ if (tls_sw_advance_skb(sk, skb, chunk)) {
+ /* Return full control message to
+ * userspace before trying to parse
+ * another message type
+ */
+ msg->msg_flags |= MSG_EOR;
+ if (control != TLS_RECORD_TYPE_DATA)
+ goto recv_end;
+ }
+ }
+ } while (len);
+
+recv_end:
+ release_sock(sk);
+ return copied ? : err;
+}
+
+ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct strp_msg *rxm = NULL;
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ ssize_t copied = 0;
+ int err = 0;
+ long timeo;
+ int chunk;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ skb = tls_wait_data(sk, flags, timeo, &err);
+ if (!skb)
+ goto splice_read_end;
+
+ /* splice does not support reading control messages */
+ if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ err = -ENOTSUPP;
+ goto splice_read_end;
+ }
+
+ if (!ctx->decrypted) {
+ err = decrypt_skb(sk, skb, NULL);
+
+ if (err < 0) {
+ tls_err_abort(sk, EBADMSG);
+ goto splice_read_end;
+ }
+ ctx->decrypted = true;
+ }
+ rxm = strp_msg(skb);
+
+ chunk = min_t(unsigned int, rxm->full_len, len);
+ copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
+ if (copied < 0)
+ goto splice_read_end;
+
+ if (likely(!(flags & MSG_PEEK)))
+ tls_sw_advance_skb(sk, skb, copied);
+
+splice_read_end:
+ release_sock(sk);
+ return copied ? : err;
+}
+
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
+{
+ unsigned int ret;
+ struct sock *sk = sock->sk;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+ /* Grab POLLOUT and POLLHUP from the underlying socket */
+ ret = ctx->sk_poll(file, sock, wait);
+
+ /* Clear POLLIN bits, and set based on recv_pkt */
+ ret &= ~(POLLIN | POLLRDNORM);
+ if (ctx->recv_pkt)
+ ret |= POLLIN | POLLRDNORM;
+
+ return ret;
+}
+
+static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ char header[tls_ctx->rx.prepend_size];
+ struct strp_msg *rxm = strp_msg(skb);
+ size_t cipher_overhead;
+ size_t data_len = 0;
+ int ret;
+
+ /* Verify that we have a full TLS header, or wait for more data */
+ if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+ return 0;
+
+ /* Linearize header to local buffer */
+ ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+
+ if (ret < 0)
+ goto read_failure;
+
+ ctx->control = header[0];
+
+ data_len = ((header[4] & 0xFF) | (header[3] << 8));
+
+ cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+
+ if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+ ret = -EMSGSIZE;
+ goto read_failure;
+ }
+ if (data_len < cipher_overhead) {
+ ret = -EBADMSG;
+ goto read_failure;
+ }
+
+ if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) ||
+ header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) {
+ ret = -EINVAL;
+ goto read_failure;
+ }
+
+ return data_len + TLS_HEADER_SIZE;
+
+read_failure:
+ tls_err_abort(strp->sk, ret);
+
+ return ret;
+}
+
+static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct strp_msg *rxm;
+
+ rxm = strp_msg(skb);
+
+ ctx->decrypted = false;
+
+ ctx->recv_pkt = skb;
+ strp_pause(strp);
+
+ strp->sk->sk_state_change(strp->sk);
+}
+
+static void tls_data_ready(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+ strp_data_ready(&ctx->strp);
+}
+
+void tls_sw_free_resources(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
if (ctx->aead_send)
crypto_free_aead(ctx->aead_send);
+ if (ctx->aead_recv) {
+ if (ctx->recv_pkt) {
+ kfree_skb(ctx->recv_pkt);
+ ctx->recv_pkt = NULL;
+ }
+ crypto_free_aead(ctx->aead_recv);
+ strp_stop(&ctx->strp);
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = ctx->saved_data_ready;
+ write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
+ strp_done(&ctx->strp);
+ lock_sock(sk);
+ }
tls_free_both_sg(sk);
@@ -643,12 +1035,15 @@ void tls_sw_free_tx_resources(struct sock *sk)
kfree(tls_ctx);
}
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
struct tls_crypto_info *crypto_info;
struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
struct tls_sw_context *sw_ctx;
+ struct cipher_context *cctx;
+ struct crypto_aead **aead;
+ struct strp_callbacks cb;
u16 nonce_size, tag_size, iv_size, rec_seq_size;
char *iv, *rec_seq;
int rc = 0;
@@ -658,22 +1053,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
goto out;
}
- if (ctx->priv_ctx) {
- rc = -EEXIST;
- goto out;
- }
-
- sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
- if (!sw_ctx) {
- rc = -ENOMEM;
- goto out;
+ if (!ctx->priv_ctx) {
+ sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
+ if (!sw_ctx) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ crypto_init_wait(&sw_ctx->async_wait);
+ } else {
+ sw_ctx = ctx->priv_ctx;
}
- crypto_init_wait(&sw_ctx->async_wait);
-
ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
- crypto_info = &ctx->crypto_send;
+ if (tx) {
+ crypto_info = &ctx->crypto_send;
+ cctx = &ctx->tx;
+ aead = &sw_ctx->aead_send;
+ } else {
+ crypto_info = &ctx->crypto_recv;
+ cctx = &ctx->rx;
+ aead = &sw_ctx->aead_recv;
+ }
+
switch (crypto_info->cipher_type) {
case TLS_CIPHER_AES_GCM_128: {
nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -692,46 +1094,49 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
goto free_priv;
}
- ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
- ctx->tag_size = tag_size;
- ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
- ctx->iv_size = iv_size;
- ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
- if (!ctx->iv) {
+ cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
+ cctx->tag_size = tag_size;
+ cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
+ cctx->iv_size = iv_size;
+ cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ GFP_KERNEL);
+ if (!cctx->iv) {
rc = -ENOMEM;
goto free_priv;
}
- memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
- ctx->rec_seq_size = rec_seq_size;
- ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
- if (!ctx->rec_seq) {
+ memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+ cctx->rec_seq_size = rec_seq_size;
+ cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+ if (!cctx->rec_seq) {
rc = -ENOMEM;
goto free_iv;
}
- memcpy(ctx->rec_seq, rec_seq, rec_seq_size);
-
- sg_init_table(sw_ctx->sg_encrypted_data,
- ARRAY_SIZE(sw_ctx->sg_encrypted_data));
- sg_init_table(sw_ctx->sg_plaintext_data,
- ARRAY_SIZE(sw_ctx->sg_plaintext_data));
-
- sg_init_table(sw_ctx->sg_aead_in, 2);
- sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
- sizeof(sw_ctx->aad_space));
- sg_unmark_end(&sw_ctx->sg_aead_in[1]);
- sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
- sg_init_table(sw_ctx->sg_aead_out, 2);
- sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
- sizeof(sw_ctx->aad_space));
- sg_unmark_end(&sw_ctx->sg_aead_out[1]);
- sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
-
- if (!sw_ctx->aead_send) {
- sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0);
- if (IS_ERR(sw_ctx->aead_send)) {
- rc = PTR_ERR(sw_ctx->aead_send);
- sw_ctx->aead_send = NULL;
+ memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
+
+ if (tx) {
+ sg_init_table(sw_ctx->sg_encrypted_data,
+ ARRAY_SIZE(sw_ctx->sg_encrypted_data));
+ sg_init_table(sw_ctx->sg_plaintext_data,
+ ARRAY_SIZE(sw_ctx->sg_plaintext_data));
+
+ sg_init_table(sw_ctx->sg_aead_in, 2);
+ sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
+ sizeof(sw_ctx->aad_space));
+ sg_unmark_end(&sw_ctx->sg_aead_in[1]);
+ sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
+ sg_init_table(sw_ctx->sg_aead_out, 2);
+ sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
+ sizeof(sw_ctx->aad_space));
+ sg_unmark_end(&sw_ctx->sg_aead_out[1]);
+ sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+ }
+
+ if (!*aead) {
+ *aead = crypto_alloc_aead("gcm(aes)", 0, 0);
+ if (IS_ERR(*aead)) {
+ rc = PTR_ERR(*aead);
+ *aead = NULL;
goto free_rec_seq;
}
}
@@ -740,24 +1145,44 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
- rc = crypto_aead_setkey(sw_ctx->aead_send, keyval,
+ rc = crypto_aead_setkey(*aead, keyval,
TLS_CIPHER_AES_GCM_128_KEY_SIZE);
if (rc)
goto free_aead;
- rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
- if (!rc)
- return 0;
+ rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+ if (rc)
+ goto free_aead;
+
+ if (!tx) {
+ /* Set up strparser */
+ memset(&cb, 0, sizeof(cb));
+ cb.rcv_msg = tls_queue;
+ cb.parse_msg = tls_read_size;
+
+ strp_init(&sw_ctx->strp, sk, &cb);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ sw_ctx->saved_data_ready = sk->sk_data_ready;
+ sk->sk_data_ready = tls_data_ready;
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+
+ strp_check_rcv(&sw_ctx->strp);
+ }
+
+ goto out;
free_aead:
- crypto_free_aead(sw_ctx->aead_send);
- sw_ctx->aead_send = NULL;
+ crypto_free_aead(*aead);
+ *aead = NULL;
free_rec_seq:
- kfree(ctx->rec_seq);
- ctx->rec_seq = NULL;
+ kfree(cctx->rec_seq);
+ cctx->rec_seq = NULL;
free_iv:
- kfree(ctx->iv);
- ctx->iv = NULL;
+ kfree(ctx->tx.iv);
+ ctx->tx.iv = NULL;
free_priv:
kfree(ctx->priv_ctx);
ctx->priv_ctx = NULL;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2d465bdeccbc..68bb70a62afe 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
@@ -745,14 +745,6 @@ static struct proto unix_proto = {
.obj_size = sizeof(struct unix_sock),
};
-/*
- * AF_UNIX sockets do not interact with hardware, hence they
- * dont trigger interrupts - so it's safe for them to have
- * bh-unsafe locking for their sk_receive_queue.lock. Split off
- * this special lock-class by reinitializing the spinlock key:
- */
-static struct lock_class_key af_unix_sk_receive_queue_lock_key;
-
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
{
struct sock *sk = NULL;
@@ -767,8 +759,6 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
goto out;
sock_init_data(sock, sk);
- lockdep_set_class(&sk->sk_receive_queue.lock,
- &af_unix_sk_receive_queue_lock_key);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
@@ -1453,7 +1443,7 @@ out:
}
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
@@ -1476,12 +1466,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
if (!u->addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
- *uaddr_len = sizeof(short);
+ err = sizeof(short);
} else {
struct unix_address *addr = u->addr;
- *uaddr_len = addr->len;
- memcpy(sunaddr, addr->name, *uaddr_len);
+ err = addr->len;
+ memcpy(sunaddr, addr->name, addr->len);
}
unix_state_unlock(sk);
sock_put(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e0fc84daed94..aac9b8f6552e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
static int vsock_getname(struct socket *sock,
- struct sockaddr *addr, int *addr_len, int peer)
+ struct sockaddr *addr, int peer)
{
int err;
struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
*/
BUILD_BUG_ON(sizeof(*vm_addr) > 128);
memcpy(addr, vm_addr, sizeof(*vm_addr));
- *addr_len = sizeof(*vm_addr);
+ err = sizeof(*vm_addr);
out:
release_sock(sk);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 1abcc4fc4df1..41722046b937 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -34,9 +34,10 @@ config CFG80211
When built as a module it will be called cfg80211.
+if CFG80211
+
config NL80211_TESTMODE
bool "nl80211 testmode command"
- depends on CFG80211
help
The nl80211 testmode command helps implementing things like
factory calibration or validation tools for wireless chips.
@@ -51,7 +52,6 @@ config NL80211_TESTMODE
config CFG80211_DEVELOPER_WARNINGS
bool "enable developer warnings"
- depends on CFG80211
default n
help
This option enables some additional warnings that help
@@ -68,7 +68,7 @@ config CFG80211_DEVELOPER_WARNINGS
config CFG80211_CERTIFICATION_ONUS
bool "cfg80211 certification onus"
- depends on CFG80211 && EXPERT
+ depends on EXPERT
default n
---help---
You should disable this option unless you are both capable
@@ -159,7 +159,6 @@ config CFG80211_REG_RELAX_NO_IR
config CFG80211_DEFAULT_PS
bool "enable powersave by default"
- depends on CFG80211
default y
help
This option enables powersave mode by default.
@@ -170,7 +169,6 @@ config CFG80211_DEFAULT_PS
config CFG80211_DEBUGFS
bool "cfg80211 DebugFS entries"
- depends on CFG80211
depends on DEBUG_FS
---help---
You can enable this if you want debugfs entries for cfg80211.
@@ -180,7 +178,6 @@ config CFG80211_DEBUGFS
config CFG80211_CRDA_SUPPORT
bool "support CRDA" if EXPERT
default y
- depends on CFG80211
help
You should enable this option unless you know for sure you have no
need for it, for example when using internal regdb (above) or the
@@ -190,7 +187,6 @@ config CFG80211_CRDA_SUPPORT
config CFG80211_WEXT
bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
- depends on CFG80211
select WEXT_CORE
default y if CFG80211_WEXT_EXPORT
help
@@ -199,11 +195,12 @@ config CFG80211_WEXT
config CFG80211_WEXT_EXPORT
bool
- depends on CFG80211
help
Drivers should select this option if they require cfg80211's
wext compatibility symbols to be exported.
+endif # CFG80211
+
config LIB80211
tristate
default n
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 63682176c96c..882d97bdc6bf 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -27,6 +27,7 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
err = rdev_stop_ap(rdev, dev);
if (!err) {
+ wdev->conn_owner_nlportid = 0;
wdev->beacon_interval = 0;
memset(&wdev->chandef, 0, sizeof(wdev->chandef));
wdev->ssid_len = 0;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index a48859982a32..2db713d18f71 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -579,6 +579,10 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
+ bool dfs_offload;
+
+ dfs_offload = wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_DFS_OFFLOAD);
start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
@@ -596,8 +600,9 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
if (c->flags & IEEE80211_CHAN_DISABLED)
return false;
- if ((c->flags & IEEE80211_CHAN_RADAR) &&
- (c->dfs_state != NL80211_DFS_AVAILABLE))
+ if ((c->flags & IEEE80211_CHAN_RADAR) &&
+ (c->dfs_state != NL80211_DFS_AVAILABLE) &&
+ !(c->dfs_state == NL80211_DFS_USABLE && dfs_offload))
return false;
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index eaff636169c2..63eb1b5fdd04 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -282,10 +282,10 @@ void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs);
/* IBSS */
-int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_ibss_params *params,
- struct cfg80211_cached_keys *connkeys);
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ibss_params *params,
+ struct cfg80211_cached_keys *connkeys);
void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool nowext);
@@ -303,10 +303,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct mesh_setup *setup,
const struct mesh_config *conf);
-int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct mesh_setup *setup,
- const struct mesh_config *conf);
int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev);
int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a1d10993d08a..d1743e6abc34 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -84,14 +84,15 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
}
EXPORT_SYMBOL(cfg80211_ibss_joined);
-static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_ibss_params *params,
- struct cfg80211_cached_keys *connkeys)
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ibss_params *params,
+ struct cfg80211_cached_keys *connkeys)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
+ ASSERT_RTNL();
ASSERT_WDEV_LOCK(wdev);
if (wdev->ssid_len)
@@ -146,23 +147,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
return 0;
}
-int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_ibss_params *params,
- struct cfg80211_cached_keys *connkeys)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- ASSERT_RTNL();
-
- wdev_lock(wdev);
- err = __cfg80211_join_ibss(rdev, dev, params, connkeys);
- wdev_unlock(wdev);
-
- return err;
-}
-
static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -224,6 +208,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
if (err)
return err;
+ wdev->conn_owner_nlportid = 0;
__cfg80211_clear_ibss(dev, nowext);
return 0;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index b12da6ef3c12..eac5aa1419fc 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -217,21 +217,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
return err;
}
-int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct mesh_setup *setup,
- const struct mesh_config *conf)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_join_mesh(rdev, dev, setup, conf);
- wdev_unlock(wdev);
-
- return err;
-}
-
int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
struct cfg80211_chan_def *chandef)
@@ -286,6 +271,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
err = rdev_leave_mesh(rdev, dev);
if (!err) {
+ wdev->conn_owner_nlportid = 0;
wdev->mesh_id_len = 0;
wdev->beacon_interval = 0;
memset(&wdev->chandef, 0, sizeof(wdev->chandef));
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bbb9907bfa86..12b3edf70a7b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -872,7 +872,7 @@ void cfg80211_cac_event(struct net_device *netdev,
trace_cfg80211_cac_event(netdev, event);
- if (WARN_ON(!wdev->cac_started))
+ if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
return;
if (WARN_ON(!wdev->chandef.chan))
@@ -888,14 +888,17 @@ void cfg80211_cac_event(struct net_device *netdev,
sizeof(struct cfg80211_chan_def));
queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk);
cfg80211_sched_dfs_chan_update(rdev);
- break;
+ /* fall through */
case NL80211_RADAR_CAC_ABORTED:
+ wdev->cac_started = false;
+ break;
+ case NL80211_RADAR_CAC_STARTED:
+ wdev->cac_started = true;
break;
default:
WARN_ON(1);
return;
}
- wdev->cac_started = false;
nl80211_radar_notify(rdev, chandef, event, netdev, gfp);
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c0dcc8324b0..ff28f8feeb09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -287,6 +287,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
[NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
+ [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG },
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -421,6 +422,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+ [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -3923,9 +3925,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
return false;
return true;
case NL80211_CMD_CONNECT:
- /* SAE not supported yet */
- if (auth_type == NL80211_AUTHTYPE_SAE)
+ if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+ auth_type == NL80211_AUTHTYPE_SAE)
return false;
+
/* FILS with SK PFS or PK not supported yet */
if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
auth_type == NL80211_AUTHTYPE_FILS_PK)
@@ -4132,6 +4135,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
wdev->chandef = params.chandef;
wdev->ssid_len = params.ssid_len;
memcpy(wdev->ssid, params.ssid, wdev->ssid_len);
+
+ if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+ wdev->conn_owner_nlportid = info->snd_portid;
}
wdev_unlock(wdev);
@@ -4487,6 +4493,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
PUT_SINFO_U64(BEACON_RX, rx_beacon);
PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+ PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
#undef PUT_SINFO
#undef PUT_SINFO_U64
@@ -5848,7 +5855,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
return genlmsg_reply(msg, info);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
out:
nlmsg_free(msg);
return -ENOBUFS;
@@ -6329,7 +6335,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
nla_put_failure_rcu:
rcu_read_unlock();
nla_put_failure:
- genlmsg_cancel(msg, hdr);
put_failure:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -6718,8 +6723,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+ ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
return -EOPNOTSUPP;
if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -7541,12 +7555,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_chan_def chandef;
enum nl80211_dfs_regions dfs_region;
unsigned int cac_time_ms;
int err;
- dfs_region = reg_get_dfs_region(wdev->wiphy);
+ dfs_region = reg_get_dfs_region(wiphy);
if (dfs_region == NL80211_DFS_UNSET)
return -EINVAL;
@@ -7560,17 +7575,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (wdev->cac_started)
return -EBUSY;
- err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
- wdev->iftype);
+ err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype);
if (err < 0)
return err;
if (err == 0)
return -EINVAL;
- if (!cfg80211_chandef_dfs_usable(wdev->wiphy, &chandef))
+ if (!cfg80211_chandef_dfs_usable(wiphy, &chandef))
return -EINVAL;
+ /* CAC start is offloaded to HW and can't be started manually */
+ if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD))
+ return -EOPNOTSUPP;
+
if (!rdev->ops->start_radar_detection)
return -EOPNOTSUPP;
@@ -8194,6 +8212,22 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int validate_pae_over_nl80211(struct cfg80211_registered_device *rdev,
+ struct genl_info *info)
+{
+ if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+ GENL_SET_ERR_MSG(info, "SOCKET_OWNER not set");
+ return -EINVAL;
+ }
+
+ if (!rdev->ops->tx_control_port ||
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
struct genl_info *info,
struct cfg80211_crypto_settings *settings,
@@ -8217,6 +8251,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
} else
settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
+ if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+ int r = validate_pae_over_nl80211(rdev, info);
+
+ if (r < 0)
+ return r;
+
+ settings->control_port_over_nl80211 = true;
+ }
+
if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
void *data;
int len, i;
@@ -8662,12 +8705,26 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
ibss.control_port =
nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT]);
+ if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+ int r = validate_pae_over_nl80211(rdev, info);
+
+ if (r < 0)
+ return r;
+
+ ibss.control_port_over_nl80211 = true;
+ }
+
ibss.userspace_handles_dfs =
nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
- err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
+ wdev_lock(dev->ieee80211_ptr);
+ err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
if (err)
kzfree(connkeys);
+ else if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+ dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
+ wdev_unlock(dev->ieee80211_ptr);
+
return err;
}
@@ -9155,6 +9212,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+ if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+ GENL_SET_ERR_MSG(info,
+ "external auth requires connection ownership");
+ return -EINVAL;
+ }
+ connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+ }
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -10064,7 +10130,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
} else {
- /* cfg80211_join_mesh() will sort it out */
+ /* __cfg80211_join_mesh() will sort it out */
setup.chandef.chan = NULL;
}
@@ -10102,7 +10168,22 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
setup.userspace_handles_dfs =
nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
- return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
+ if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+ int r = validate_pae_over_nl80211(rdev, info);
+
+ if (r < 0)
+ return r;
+
+ setup.control_port_over_nl80211 = true;
+ }
+
+ wdev_lock(dev->ieee80211_ptr);
+ err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg);
+ if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER])
+ dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
+ wdev_unlock(dev->ieee80211_ptr);
+
+ return err;
}
static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
@@ -12463,6 +12544,103 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
return ret;
}
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_external_auth_params params;
+
+ if (!rdev->ops->external_auth)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_SSID])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_BSSID])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+ return -EINVAL;
+
+ memset(&params, 0, sizeof(params));
+
+ params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ if (params.ssid.ssid_len == 0 ||
+ params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+ memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+ params.ssid.ssid_len);
+
+ memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+ ETH_ALEN);
+
+ params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+ return rdev_external_auth(rdev, dev, &params);
+}
+
+static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ const u8 *buf;
+ size_t len;
+ u8 *dest;
+ u16 proto;
+ bool noencrypt;
+ int err;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211))
+ return -EOPNOTSUPP;
+
+ if (!rdev->ops->tx_control_port)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_FRAME] ||
+ !info->attrs[NL80211_ATTR_MAC] ||
+ !info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
+ GENL_SET_ERR_MSG(info, "Frame, MAC or ethertype missing");
+ return -EINVAL;
+ }
+
+ wdev_lock(wdev);
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_MESH_POINT:
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ if (wdev->current_bss)
+ break;
+ err = -ENOTCONN;
+ goto out;
+ default:
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ wdev_unlock(wdev);
+
+ buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+ len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+ dest = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ proto = nla_get_u16(info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
+ noencrypt =
+ nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]);
+
+ return rdev_tx_control_port(rdev, dev, buf, len,
+ dest, cpu_to_be16(proto), noencrypt);
+
+ out:
+ wdev_unlock(wdev);
+ return err;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -13358,7 +13536,22 @@ static const struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
-
+ {
+ .cmd = NL80211_CMD_EXTERNAL_AUTH,
+ .doit = nl80211_external_auth,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_CONTROL_PORT_FRAME,
+ .doit = nl80211_tx_control_port,
+ .policy = nl80211_policy,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -13672,7 +13865,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13720,7 +13912,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13808,7 +13999,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13884,7 +14074,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13924,7 +14113,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13954,7 +14142,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -13991,7 +14178,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14024,7 +14210,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14065,7 +14250,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
@@ -14104,7 +14288,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14159,7 +14342,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14205,7 +14387,6 @@ static void nl80211_send_remain_on_chan_event(
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14319,7 +14500,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_conn_failed);
@@ -14356,7 +14536,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
return true;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
return true;
}
@@ -14440,7 +14619,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
return -ENOBUFS;
}
@@ -14484,11 +14662,68 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
+static int __nl80211_rx_control_port(struct net_device *dev,
+ const u8 *buf, size_t len,
+ const u8 *addr, u16 proto,
+ bool unencrypted, gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+ u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid);
+
+ if (!nlportid)
+ return -ENOENT;
+
+ msg = nlmsg_new(100 + len, gfp);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONTROL_PORT_FRAME);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return -ENOBUFS;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+ NL80211_ATTR_PAD) ||
+ nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
+ nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
+ (unencrypted && nla_put_flag(msg,
+ NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
+
+ nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+
+bool cfg80211_rx_control_port(struct net_device *dev,
+ const u8 *buf, size_t len,
+ const u8 *addr, u16 proto, bool unencrypted)
+{
+ int ret;
+
+ trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted);
+ ret = __nl80211_rx_control_port(dev, buf, len, addr, proto,
+ unencrypted, GFP_ATOMIC);
+ trace_cfg80211_return_bool(ret == 0);
+ return ret == 0;
+}
+EXPORT_SYMBOL(cfg80211_rx_control_port);
+
static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev,
const char *mac, gfp_t gfp)
{
@@ -14693,7 +14928,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14751,7 +14985,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14804,7 +15037,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
@@ -14886,12 +15118,67 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+ struct sta_opmode_info *sta_opmode,
+ gfp_t gfp)
+{
+ struct sk_buff *msg;
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ void *hdr;
+
+ if (WARN_ON(!mac))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+ goto nla_put_failure;
+
+ if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+ nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, gfp);
+
+ return;
+
+nla_put_failure:
+ nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
- u64 cookie, bool acked, gfp_t gfp)
+ u64 cookie, bool acked, s32 ack_signal,
+ bool is_valid_ack_signal, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -14916,7 +15203,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
NL80211_ATTR_PAD) ||
- (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+ (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+ (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+ ack_signal)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -14926,7 +15215,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_probe_status);
@@ -14971,8 +15259,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
nla_put_failure:
spin_unlock_bh(&rdev->beacon_registrations_lock);
- if (hdr)
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_report_obss_beacon);
@@ -15188,7 +15474,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_tdls_oper_request);
@@ -15333,8 +15618,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
return;
nla_put_failure:
- if (hdr)
- genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
@@ -15369,6 +15652,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
nlmsg_free(msg);
}
+int cfg80211_external_auth_request(struct net_device *dev,
+ struct cfg80211_external_auth_params *params,
+ gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ if (!wdev->conn_owner_nlportid)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+ nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+ nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+ params->action) ||
+ nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+ nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+ params->ssid.ssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+ wdev->conn_owner_nlportid);
+ return 0;
+
+ nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
/* initialisation/exit functions */
int __init nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0c06240d25af..87479a53411b 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -714,6 +714,21 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ const void *buf, size_t len,
+ const u8 *dest, __be16 proto,
+ const bool noencrypt)
+{
+ int ret;
+ trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
+ dest, proto, noencrypt);
+ ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
+ dest, proto, noencrypt);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
static inline int
rdev_mgmt_tx_cancel_wait(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u64 cookie)
@@ -1190,4 +1205,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_external_auth_params *params)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_external_auth(&rdev->wiphy, dev, params);
+ if (rdev->ops->external_auth)
+ ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 7b42f0bacfd8..16c7e4ef5820 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,6 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -134,12 +135,12 @@ static void restore_regulatory_settings(bool reset_user);
static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
{
- return rtnl_dereference(cfg80211_regdomain);
+ return rcu_dereference_rtnl(cfg80211_regdomain);
}
const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
{
- return rtnl_dereference(wiphy->regd);
+ return rcu_dereference_rtnl(wiphy->regd);
}
static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region)
@@ -424,23 +425,36 @@ static const struct ieee80211_regdomain *
reg_copy_regd(const struct ieee80211_regdomain *src_regd)
{
struct ieee80211_regdomain *regd;
- int size_of_regd;
+ int size_of_regd, size_of_wmms;
unsigned int i;
+ struct ieee80211_wmm_rule *d_wmm, *s_wmm;
size_of_regd =
sizeof(struct ieee80211_regdomain) +
src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
+ size_of_wmms = src_regd->n_wmm_rules *
+ sizeof(struct ieee80211_wmm_rule);
- regd = kzalloc(size_of_regd, GFP_KERNEL);
+ regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
if (!regd)
return ERR_PTR(-ENOMEM);
memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
- for (i = 0; i < src_regd->n_reg_rules; i++)
+ d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+ s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd);
+ memcpy(d_wmm, s_wmm, size_of_wmms);
+
+ for (i = 0; i < src_regd->n_reg_rules; i++) {
memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
sizeof(struct ieee80211_reg_rule));
+ if (!src_regd->reg_rules[i].wmm_rule)
+ continue;
+ regd->reg_rules[i].wmm_rule = d_wmm +
+ (src_regd->reg_rules[i].wmm_rule - s_wmm) /
+ sizeof(struct ieee80211_wmm_rule);
+ }
return regd;
}
@@ -595,6 +609,17 @@ enum fwdb_flags {
FWDB_FLAG_AUTO_BW = BIT(4),
};
+struct fwdb_wmm_ac {
+ u8 ecw;
+ u8 aifsn;
+ __be16 cot;
+} __packed;
+
+struct fwdb_wmm_rule {
+ struct fwdb_wmm_ac client[IEEE80211_NUM_ACS];
+ struct fwdb_wmm_ac ap[IEEE80211_NUM_ACS];
+} __packed;
+
struct fwdb_rule {
u8 len;
u8 flags;
@@ -602,6 +627,7 @@ struct fwdb_rule {
__be32 start, end, max_bw;
/* start of optional data */
__be16 cac_timeout;
+ __be16 wmm_ptr;
} __packed __aligned(4);
#define FWDB_MAGIC 0x52474442
@@ -613,6 +639,31 @@ struct fwdb_header {
struct fwdb_country country[];
} __packed __aligned(4);
+static int ecw2cw(int ecw)
+{
+ return (1 << ecw) - 1;
+}
+
+static bool valid_wmm(struct fwdb_wmm_rule *rule)
+{
+ struct fwdb_wmm_ac *ac = (struct fwdb_wmm_ac *)rule;
+ int i;
+
+ for (i = 0; i < IEEE80211_NUM_ACS * 2; i++) {
+ u16 cw_min = ecw2cw((ac[i].ecw & 0xf0) >> 4);
+ u16 cw_max = ecw2cw(ac[i].ecw & 0x0f);
+ u8 aifsn = ac[i].aifsn;
+
+ if (cw_min >= cw_max)
+ return false;
+
+ if (aifsn < 1)
+ return false;
+ }
+
+ return true;
+}
+
static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
{
struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
@@ -623,7 +674,18 @@ static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
/* mandatory fields */
if (rule->len < offsetofend(struct fwdb_rule, max_bw))
return false;
+ if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
+ u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+ struct fwdb_wmm_rule *wmm;
+
+ if (wmm_ptr + sizeof(struct fwdb_wmm_rule) > size)
+ return false;
+ wmm = (void *)(data + wmm_ptr);
+
+ if (!valid_wmm(wmm))
+ return false;
+ }
return true;
}
@@ -798,23 +860,118 @@ static bool valid_regdb(const u8 *data, unsigned int size)
return true;
}
+static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
+ struct fwdb_wmm_rule *wmm)
+{
+ unsigned int i;
+
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ rule->client[i].cw_min =
+ ecw2cw((wmm->client[i].ecw & 0xf0) >> 4);
+ rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
+ rule->client[i].aifsn = wmm->client[i].aifsn;
+ rule->client[i].cot = 1000 * be16_to_cpu(wmm->client[i].cot);
+ rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
+ rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
+ rule->ap[i].aifsn = wmm->ap[i].aifsn;
+ rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
+ }
+}
+
+static int __regdb_query_wmm(const struct fwdb_header *db,
+ const struct fwdb_country *country, int freq,
+ u32 *dbptr, struct ieee80211_wmm_rule *rule)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+ int i;
+
+ for (i = 0; i < coll->n_rules; i++) {
+ __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+ unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+ struct fwdb_rule *rrule = (void *)((u8 *)db + rule_ptr);
+ struct fwdb_wmm_rule *wmm;
+ unsigned int wmm_ptr;
+
+ if (rrule->len < offsetofend(struct fwdb_rule, wmm_ptr))
+ continue;
+
+ if (freq >= KHZ_TO_MHZ(be32_to_cpu(rrule->start)) &&
+ freq <= KHZ_TO_MHZ(be32_to_cpu(rrule->end))) {
+ wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
+ wmm = (void *)((u8 *)db + wmm_ptr);
+ set_wmm_rule(rule, wmm);
+ if (dbptr)
+ *dbptr = wmm_ptr;
+ return 0;
+ }
+ }
+
+ return -ENODATA;
+}
+
+int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
+ struct ieee80211_wmm_rule *rule)
+{
+ const struct fwdb_header *hdr = regdb;
+ const struct fwdb_country *country;
+
+ if (IS_ERR(regdb))
+ return PTR_ERR(regdb);
+
+ country = &hdr->country[0];
+ while (country->coll_ptr) {
+ if (alpha2_equal(alpha2, country->alpha2))
+ return __regdb_query_wmm(regdb, country, freq, dbptr,
+ rule);
+
+ country++;
+ }
+
+ return -ENODATA;
+}
+EXPORT_SYMBOL(reg_query_regdb_wmm);
+
+struct wmm_ptrs {
+ struct ieee80211_wmm_rule *rule;
+ u32 ptr;
+};
+
+static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs,
+ u32 wmm_ptr, int n_wmms)
+{
+ int i;
+
+ for (i = 0; i < n_wmms; i++) {
+ if (wmm_ptrs[i].ptr == wmm_ptr)
+ return wmm_ptrs[i].rule;
+ }
+ return NULL;
+}
+
static int regdb_query_country(const struct fwdb_header *db,
const struct fwdb_country *country)
{
unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
struct ieee80211_regdomain *regdom;
- unsigned int size_of_regd;
- unsigned int i;
+ struct ieee80211_regdomain *tmp_rd;
+ unsigned int size_of_regd, i, n_wmms = 0;
+ struct wmm_ptrs *wmm_ptrs;
- size_of_regd =
- sizeof(struct ieee80211_regdomain) +
+ size_of_regd = sizeof(struct ieee80211_regdomain) +
coll->n_rules * sizeof(struct ieee80211_reg_rule);
regdom = kzalloc(size_of_regd, GFP_KERNEL);
if (!regdom)
return -ENOMEM;
+ wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL);
+ if (!wmm_ptrs) {
+ kfree(regdom);
+ return -ENOMEM;
+ }
+
regdom->n_reg_rules = coll->n_rules;
regdom->alpha2[0] = country->alpha2[0];
regdom->alpha2[1] = country->alpha2[1];
@@ -851,7 +1008,38 @@ static int regdb_query_country(const struct fwdb_header *db,
if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
rrule->dfs_cac_ms =
1000 * be16_to_cpu(rule->cac_timeout);
+ if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
+ u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+ struct ieee80211_wmm_rule *wmm_pos =
+ find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms);
+ struct fwdb_wmm_rule *wmm;
+ struct ieee80211_wmm_rule *wmm_rule;
+
+ if (wmm_pos) {
+ rrule->wmm_rule = wmm_pos;
+ continue;
+ }
+ wmm = (void *)((u8 *)db + wmm_ptr);
+ tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) *
+ sizeof(struct ieee80211_wmm_rule),
+ GFP_KERNEL);
+
+ if (!tmp_rd) {
+ kfree(regdom);
+ return -ENOMEM;
+ }
+ regdom = tmp_rd;
+
+ wmm_rule = (struct ieee80211_wmm_rule *)
+ ((u8 *)regdom + size_of_regd + n_wmms *
+ sizeof(struct ieee80211_wmm_rule));
+
+ set_wmm_rule(wmm_rule, wmm);
+ wmm_ptrs[n_wmms].ptr = wmm_ptr;
+ wmm_ptrs[n_wmms++].rule = wmm_rule;
+ }
}
+ kfree(wmm_ptrs);
return reg_schedule_apply(regdom);
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 701cfd7acc1b..5df6b33db786 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1239,17 +1239,38 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
wdev_lock(wdev);
if (wdev->conn_owner_nlportid) {
- /*
- * Use disconnect_bssid if still connecting and ops->disconnect
- * not implemented. Otherwise we can use cfg80211_disconnect.
- */
- if (rdev->ops->disconnect || wdev->current_bss)
- cfg80211_disconnect(rdev, wdev->netdev,
- WLAN_REASON_DEAUTH_LEAVING, true);
- else
- cfg80211_mlme_deauth(rdev, wdev->netdev,
- wdev->disconnect_bssid, NULL, 0,
- WLAN_REASON_DEAUTH_LEAVING, false);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_ADHOC:
+ cfg80211_leave_ibss(rdev, wdev->netdev, false);
+ break;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ cfg80211_stop_ap(rdev, wdev->netdev, false);
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ cfg80211_leave_mesh(rdev, wdev->netdev);
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ /*
+ * Use disconnect_bssid if still connecting and
+ * ops->disconnect not implemented. Otherwise we can
+ * use cfg80211_disconnect.
+ */
+ if (rdev->ops->disconnect || wdev->current_bss)
+ cfg80211_disconnect(rdev, wdev->netdev,
+ WLAN_REASON_DEAUTH_LEAVING,
+ true);
+ else
+ cfg80211_mlme_deauth(rdev, wdev->netdev,
+ wdev->disconnect_bssid,
+ NULL, 0,
+ WLAN_REASON_DEAUTH_LEAVING,
+ false);
+ break;
+ default:
+ break;
+ }
}
wdev_unlock(wdev);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bcfedd39e7a3..55fb279a5196 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1882,6 +1882,32 @@ TRACE_EVENT(rdev_mgmt_tx,
BOOL_TO_STR(__entry->dont_wait_for_ack))
);
+TRACE_EVENT(rdev_tx_control_port,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ const u8 *buf, size_t len, const u8 *dest, __be16 proto,
+ bool unencrypted),
+ TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(dest)
+ __field(__be16, proto)
+ __field(bool, unencrypted)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(dest, dest);
+ __entry->proto = proto;
+ __entry->unencrypted = unencrypted;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ","
+ " proto: 0x%x, unencrypted: %s",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest),
+ be16_to_cpu(__entry->proto),
+ BOOL_TO_STR(__entry->unencrypted))
+);
+
TRACE_EVENT(rdev_set_noack_map,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
u16 noack_map),
@@ -2319,6 +2345,29 @@ TRACE_EVENT(rdev_del_pmk,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
);
+TRACE_EVENT(rdev_external_auth,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_external_auth_params *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(bssid)
+ __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+ __field(u16, status)
+ ),
+ TP_fast_assign(WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(bssid, params->bssid);
+ memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+ memcpy(__entry->ssid, params->ssid.ssid,
+ params->ssid.ssid_len);
+ __entry->status = params->status;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+ ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->bssid, __entry->ssid, __entry->status)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -2577,6 +2626,27 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
);
+TRACE_EVENT(cfg80211_rx_control_port,
+ TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len,
+ const u8 *addr, u16 proto, bool unencrypted),
+ TP_ARGS(netdev, buf, len, addr, proto, unencrypted),
+ TP_STRUCT__entry(
+ NETDEV_ENTRY
+ MAC_ENTRY(addr)
+ __field(u16, proto)
+ __field(bool, unencrypted)
+ ),
+ TP_fast_assign(
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(addr, addr);
+ __entry->proto = proto;
+ __entry->unencrypted = unencrypted;
+ ),
+ TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s",
+ NETDEV_PR_ARG, MAC_PR_ARG(addr),
+ __entry->proto, BOOL_TO_STR(__entry->unencrypted))
+);
+
TRACE_EVENT(cfg80211_cqm_rssi_notify,
TP_PROTO(struct net_device *netdev,
enum nl80211_cqm_rssi_threshold_event rssi_event,
@@ -3114,7 +3184,7 @@ TRACE_EVENT(rdev_start_radar_detection,
TRACE_EVENT(rdev_set_mcast_rate,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- int mcast_rate[NUM_NL80211_BANDS]),
+ int *mcast_rate),
TP_ARGS(wiphy, netdev, mcast_rate),
TP_STRUCT__entry(
WIPHY_ENTRY
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c69160694b6c..d112e9a89364 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype)
+ const u8 *addr, enum nl80211_iftype iftype,
+ u8 data_offset)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return -1;
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
if (skb->len < hdrlen + 8)
return -1;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..5e677dac2a0c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -347,13 +347,13 @@ void wireless_nlevent_flush(void)
struct sk_buff *skb;
struct net *net;
- ASSERT_RTNL();
-
+ down_read(&net_rwsem);
for_each_net(net) {
while ((skb = skb_dequeue(&net->wext_nlevents)))
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
GFP_KERNEL);
}
+ up_read(&net_rwsem);
}
EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
@@ -410,9 +410,7 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- rtnl_lock();
wireless_nlevent_flush();
- rtnl_unlock();
}
static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process);
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index 5511f989ef47..b4c464594a5e 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -142,7 +142,7 @@ static const struct file_operations wireless_seq_fops = {
int __net_init wext_proc_init(struct net *net)
{
/* Create /proc/net/wireless entry */
- if (!proc_create("wireless", S_IRUGO, net->proc_net,
+ if (!proc_create("wireless", 0444, net->proc_net,
&wireless_seq_fops))
return -ENOMEM;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 562cc11131f6..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -896,7 +896,7 @@ out:
}
static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
sx25->sx25_addr = x25->source_addr;
sx25->sx25_family = AF_X25;
- *uaddr_len = sizeof(*sx25);
+ rc = sizeof(*sx25);
out:
return rc;
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 0917f047f2cf..64b415e93f6a 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -212,16 +212,16 @@ int __init x25_proc_init(void)
if (!proc_mkdir("x25", init_net.proc_net))
return -ENOMEM;
- if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
- &x25_seq_route_fops))
+ if (!proc_create("x25/route", 0444, init_net.proc_net,
+ &x25_seq_route_fops))
goto out;
- if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
- &x25_seq_socket_fops))
+ if (!proc_create("x25/socket", 0444, init_net.proc_net,
+ &x25_seq_socket_fops))
goto out;
- if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
- &x25_seq_forward_fops))
+ if (!proc_create("x25/forward", 0444, init_net.proc_net,
+ &x25_seq_forward_fops))
goto out;
return 0;
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index db0b1315d577..9c214ec681ac 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
}
}
- pr_debug("invalid PLP frame %02X %02X %02X\n",
- frame[0], frame[1], frame[2]);
+ pr_debug("invalid PLP frame %3ph\n", frame);
return X25_ILLEGAL;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8e70291e586a..175941e15a6e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -217,7 +217,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (skb->len <= mtu)
goto ok;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
goto ok;
}
@@ -350,7 +350,7 @@ static struct notifier_block xfrm_dev_notifier = {
.notifier_call = xfrm_dev_event,
};
-void __net_init xfrm_dev_init(void)
+void __init xfrm_dev_init(void)
{
register_netdevice_notifier(&xfrm_dev_notifier);
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1472c0857975..352abca2605f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -9,6 +9,7 @@
*/
#include <linux/bottom_half.h>
+#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -26,12 +27,18 @@ struct xfrm_trans_tasklet {
};
struct xfrm_trans_cb {
+ union {
+ struct inet_skb_parm h4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_skb_parm h6;
+#endif
+ } header;
int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
};
#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
-static struct kmem_cache *secpath_cachep __read_mostly;
+static struct kmem_cache *secpath_cachep __ro_after_init;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index ccfdc7115a83..a00ec715aa46 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
struct crypto_comp *tfm;
/* This can be any valid CPU ID so we don't need locking. */
- tfm = __this_cpu_read(*pos->tfms);
+ tfm = this_cpu_read(*pos->tfms);
if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 23468672a767..89b178a78dc7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -285,8 +285,9 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
return;
afinfo = xfrm_state_get_afinfo(proto);
- if (afinfo)
+ if (afinfo) {
afinfo->local_error(skb, mtu);
- rcu_read_unlock();
+ rcu_read_unlock();
+ }
}
EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7a23078132cf..40b54cc64243 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
-static struct kmem_cache *xfrm_dst_cache __read_mostly;
+static struct kmem_cache *xfrm_dst_cache __ro_after_init;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
@@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static int xfrm_get_tos(const struct flowi *fl, int family)
{
const struct xfrm_policy_afinfo *afinfo;
- int tos = 0;
+ int tos;
afinfo = xfrm_policy_get_afinfo(family);
- tos = afinfo ? afinfo->get_tos(fl) : 0;
+ if (!afinfo)
+ return 0;
+
+ tos = afinfo->get_tos(fl);
rcu_read_unlock();
@@ -1740,7 +1743,7 @@ static void xfrm_pcpu_work_fn(struct work_struct *work)
void xfrm_policy_cache_flush(void)
{
struct xfrm_dst *old;
- bool found = 0;
+ bool found = false;
int cpu;
might_sleep();
@@ -1891,7 +1894,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
spin_unlock(&pq->hold_queue.lock);
dst_hold(xfrm_dst_path(dst));
- dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
+ dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst))
goto purge_queue;
@@ -2729,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
while (dst->xfrm) {
const struct xfrm_state *xfrm = dst->xfrm;
+ dst = xfrm_dst_child(dst);
+
if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
continue;
if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
daddr = xfrm->coaddr;
else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
daddr = &xfrm->id.daddr;
-
- dst = xfrm_dst_child(dst);
}
return daddr;
}
@@ -2892,8 +2895,6 @@ static int __net_init xfrm_policy_init(struct net *net)
INIT_LIST_HEAD(&net->xfrm.policy_all);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
- if (net_eq(net, &init_net))
- xfrm_dev_init();
return 0;
out_bydst:
@@ -2996,6 +2997,7 @@ void __init xfrm_init(void)
INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
register_pernet_subsys(&xfrm_net_ops);
+ xfrm_dev_init();
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
}
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 6d5f85f4e672..ed06903cd84d 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -79,7 +79,7 @@ static const struct file_operations xfrm_statistics_seq_fops = {
int __net_init xfrm_proc_init(struct net *net)
{
- if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net,
+ if (!proc_create("xfrm_stat", 0444, net->proc_net,
&xfrm_statistics_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 1d38c6acf8af..9e3a5e85f828 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
} else {
XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
- xo->seq.low = oseq = oseq + 1;
+ xo->seq.low = oseq + 1;
xo->seq.hi = oseq_hi;
oseq += skb_shinfo(skb)->gso_segs;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 54e21f19d722..f9d2f2233f09 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ return -EOPNOTSUPP;
+#endif
+
if (!optval && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7f52b8eb177d..080035f056d9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
struct xfrm_replay_state_esn *rs;
- if (p->flags & XFRM_STATE_ESN) {
- if (!rt)
- return -EINVAL;
+ if (!rt)
+ return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
- rs = nla_data(rt);
+ rs = nla_data(rt);
- if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
- return -EINVAL;
-
- if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
- nla_len(rt) != sizeof(*rs))
- return -EINVAL;
- }
+ if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+ return -EINVAL;
- if (!rt)
- return 0;
+ if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
+ nla_len(rt) != sizeof(*rs))
+ return -EINVAL;
/* As only ESP and AH support ESN feature. */
if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))