summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/p8022.c2
-rw-r--r--net/802/psnap.c2
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan_dev.c9
-rw-r--r--net/9p/client.c436
-rw-r--r--net/9p/error.c4
-rw-r--r--net/9p/mod.c41
-rw-r--r--net/9p/protocol.c38
-rw-r--r--net/9p/protocol.h4
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_common.h12
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_rdma.c3
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/9p/trans_xen.c26
-rw-r--r--net/Kconfig2
-rw-r--r--net/atm/br2684.c6
-rw-r--r--net/atm/lec.c8
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/ax25/ax25_iface.c6
-rw-r--r--net/ax25/ax25_in.c4
-rw-r--r--net/ax25/ax25_out.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c22
-rw-r--r--net/batman-adv/main.c56
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/routing.c3
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/batman-adv/translation-table.c4
-rw-r--r--net/batman-adv/tvlv.c4
-rw-r--r--net/batman-adv/tvlv.h4
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/eir.c335
-rw-r--r--net/bluetooth/eir.h72
-rw-r--r--net/bluetooth/hci_codec.c238
-rw-r--r--net/bluetooth/hci_codec.h7
-rw-r--r--net/bluetooth/hci_conn.c168
-rw-r--r--net/bluetooth/hci_core.c320
-rw-r--r--net/bluetooth/hci_debugfs.c123
-rw-r--r--net/bluetooth/hci_debugfs.h5
-rw-r--r--net/bluetooth/hci_event.c135
-rw-r--r--net/bluetooth/hci_request.c478
-rw-r--r--net/bluetooth/hci_request.h25
-rw-r--r--net/bluetooth/hci_sock.c214
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c10
-rw-r--r--net/bluetooth/mgmt.c445
-rw-r--r--net/bluetooth/msft.c172
-rw-r--r--net/bluetooth/msft.h9
-rw-r--r--net/bluetooth/rfcomm/core.c50
-rw-r--r--net/bluetooth/rfcomm/sock.c46
-rw-r--r--net/bluetooth/sco.c209
-rw-r--r--net/bpf/Makefile3
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c200
-rw-r--r--net/bpf/test_run.c64
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_fdb.c439
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/br_ioctl.c10
-rw-r--r--net/bridge/br_mdb.c242
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_netlink.c7
-rw-r--r--net/bridge/br_private.h47
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_switchdev.c438
-rw-r--r--net/bridge/br_vlan.c89
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c13
-rw-r--r--net/bridge/netfilter/ebtable_nat.c12
-rw-r--r--net/bridge/netfilter/ebtables.c17
-rw-r--r--net/caif/caif_usb.c2
-rw-r--r--net/caif/chnl_net.c19
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/isotp.c51
-rw-r--r--net/can/j1939/j1939-priv.h1
-rw-r--r--net/can/j1939/main.c14
-rw-r--r--net/can/j1939/transport.c25
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c120
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/devlink.c825
-rw-r--r--net/core/filter.c108
-rw-r--r--net/core/flow_dissector.c18
-rw-r--r--net/core/gen_estimator.c52
-rw-r--r--net/core/gen_stats.c186
-rw-r--r--net/core/neighbour.c204
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net-sysfs.c61
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netclassid_cgroup.c7
-rw-r--r--net/core/netprio_cgroup.c10
-rw-r--r--net/core/of_net.c170
-rw-r--r--net/core/page_pool.c10
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/selftests.c8
-rw-r--r--net/core/skbuff.c99
-rw-r--r--net/core/skmsg.c57
-rw-r--r--net/core/sock.c187
-rw-r--r--net/core/sock_destructor.h12
-rw-r--r--net/core/sock_map.c6
-rw-r--r--net/core/stream.c5
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/proto.c14
-rw-r--r--net/dsa/Kconfig25
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa.c27
-rw-r--r--net/dsa/dsa2.c236
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/port.c27
-rw-r--r--net/dsa/slave.c102
-rw-r--r--net/dsa/switch.c251
-rw-r--r--net/dsa/tag_8021q.c114
-rw-r--r--net/dsa/tag_dsa.c30
-rw-r--r--net/dsa/tag_ksz.c1
-rw-r--r--net/dsa/tag_ocelot.c45
-rw-r--r--net/dsa/tag_ocelot_8021q.c44
-rw-r--r--net/dsa/tag_rtl4_a.c2
-rw-r--r--net/dsa/tag_rtl8_4.c178
-rw-r--r--net/dsa/tag_sja1105.c52
-rw-r--r--net/ethernet/eth.c102
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/ioctl.c171
-rw-r--r--net/ethtool/module.c180
-rw-r--r--net/ethtool/netlink.c19
-rw-r--r--net/ethtool/netlink.h4
-rw-r--r--net/ethtool/pause.c3
-rw-r--r--net/hsr/hsr_device.c10
-rw-r--r--net/hsr/hsr_forward.c54
-rw-r--r--net/hsr/hsr_framereg.c65
-rw-r--r--net/hsr/hsr_framereg.h4
-rw-r--r--net/hsr/hsr_main.c2
-rw-r--r--net/hsr/hsr_main.h16
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ipv4/af_inet.c30
-rw-r--r--net/ipv4/arp.c11
-rw-r--r--net/ipv4/bpf_tcp_ca.c45
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c1
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_notifier.c1
-rw-r--r--net/ipv4/fib_semantics.c16
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c12
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c7
-rw-r--r--net/ipv4/netfilter/arptable_filter.c10
-rw-r--r--net/ipv4/netfilter/ip_tables.c7
-rw-r--r--net/ipv4/netfilter/iptable_filter.c9
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c8
-rw-r--r--net/ipv4/netfilter/iptable_nat.c15
-rw-r--r--net/ipv4/netfilter/iptable_raw.c12
-rw-r--r--net/ipv4/netfilter/iptable_security.c9
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c30
-rw-r--r--net/ipv4/nexthop.c21
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c21
-rw-r--r--net/ipv4/tcp.c160
-rw-r--r--net/ipv4/tcp_bbr.c28
-rw-r--r--net/ipv4/tcp_bpf.c75
-rw-r--r--net/ipv4/tcp_cubic.c26
-rw-r--r--net/ipv4/tcp_dctcp.c26
-rw-r--r--net/ipv4/tcp_fastopen.c6
-rw-r--r--net/ipv4/tcp_input.c39
-rw-r--r--net/ipv4/tcp_ipv4.c76
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_nv.c1
-rw-r--r--net/ipv4/tcp_output.c66
-rw-r--r--net/ipv4/tcp_rate.c6
-rw-r--r--net/ipv4/udp.c17
-rw-r--r--net/ipv4/udp_bpf.c1
-rw-r--r--net/ipv4/udp_tunnel_core.c3
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/Kconfig6
-rw-r--r--net/ipv6/Makefile11
-rw-r--r--net/ipv6/addrconf.c19
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c6
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ioam6.c81
-rw-r--r--net/ipv6/ioam6_iptunnel.c306
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/ndisc.c16
-rw-r--r--net/ipv6/netfilter/ip6_tables.c7
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c48
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c10
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c8
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c15
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c10
-rw-r--r--net/ipv6/netfilter/ip6table_security.c9
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c25
-rw-r--r--net/ipv6/route.c29
-rw-r--r--net/ipv6/seg6.c8
-rw-r--r--net/ipv6/seg6_hmac.c4
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tcp_ipv6.c58
-rw-r--r--net/ipv6/udp.c17
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/llc/llc_c_ac.c2
-rw-r--r--net/llc/llc_if.c2
-rw-r--r--net/llc/llc_output.c2
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/mac80211/agg-rx.c14
-rw-r--r--net/mac80211/cfg.c38
-rw-r--r--net/mac80211/debugfs_sta.c123
-rw-r--r--net/mac80211/fils_aead.c22
-rw-r--r--net/mac80211/ibss.c33
-rw-r--r--net/mac80211/ieee80211_i.h35
-rw-r--r--net/mac80211/iface.c39
-rw-r--r--net/mac80211/mesh.c96
-rw-r--r--net/mac80211/mesh_hwmp.c44
-rw-r--r--net/mac80211/mesh_pathtbl.c5
-rw-r--r--net/mac80211/mesh_plink.c11
-rw-r--r--net/mac80211/mesh_ps.c3
-rw-r--r--net/mac80211/mesh_sync.c26
-rw-r--r--net/mac80211/mlme.c355
-rw-r--r--net/mac80211/pm.c4
-rw-r--r--net/mac80211/rate.c4
-rw-r--r--net/mac80211/rx.c15
-rw-r--r--net/mac80211/s1g.c8
-rw-r--r--net/mac80211/scan.c16
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/mac80211/tdls.c63
-rw-r--r--net/mac80211/tx.c218
-rw-r--r--net/mac80211/util.c40
-rw-r--r--net/mac80211/wpa.c6
-rw-r--r--net/mac802154/iface.c17
-rw-r--r--net/mctp/Kconfig12
-rw-r--r--net/mctp/Makefile3
-rw-r--r--net/mctp/af_mctp.c174
-rw-r--r--net/mctp/device.c104
-rw-r--r--net/mctp/neigh.c4
-rw-r--r--net/mctp/route.c364
-rw-r--r--net/mctp/test/route-test.c544
-rw-r--r--net/mctp/test/utils.c67
-rw-r--r--net/mctp/test/utils.h20
-rw-r--r--net/mptcp/mib.c17
-rw-r--r--net/mptcp/mptcp_diag.c28
-rw-r--r--net/mptcp/options.c54
-rw-r--r--net/mptcp/pm_netlink.c13
-rw-r--r--net/mptcp/protocol.c504
-rw-r--r--net/mptcp/protocol.h21
-rw-r--r--net/mptcp/sockopt.c279
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/mptcp/syncookies.c13
-rw-r--r--net/mptcp/token.c11
-rw-r--r--net/mptcp/token_test.c14
-rw-r--r--net/netfilter/Kconfig13
-rw-r--r--net/netfilter/core.c38
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c166
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c154
-rw-r--r--net/netfilter/nf_conntrack_proto.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_nat_core.c29
-rw-r--r--net/netfilter/nf_nat_masquerade.c168
-rw-r--r--net/netfilter/nf_tables_api.c121
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nf_tables_trace.c4
-rw-r--r--net/netfilter/nfnetlink_hook.c16
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_chain_filter.c13
-rw-r--r--net/netfilter/nft_compat.c17
-rw-r--r--net/netfilter/nft_dynset.c11
-rw-r--r--net/netfilter/nft_meta.c8
-rw-r--r--net/netfilter/nft_payload.c60
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_LOG.c10
-rw-r--r--net/netfilter/xt_NFLOG.c10
-rw-r--r--net/netfilter/xt_RATEEST.c7
-rw-r--r--net/netlink/af_netlink.c37
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/netrom/nr_dev.c8
-rw-r--r--net/netrom/nr_route.c4
-rw-r--r--net/nfc/af_nfc.c3
-rw-r--r--net/nfc/digital_core.c9
-rw-r--r--net/nfc/digital_technology.c8
-rw-r--r--net/nfc/hci/command.c16
-rw-r--r--net/nfc/hci/llc_shdlc.c35
-rw-r--r--net/nfc/llcp_commands.c8
-rw-r--r--net/nfc/llcp_core.c5
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/nfc/nci/hci.c4
-rw-r--r--net/nfc/nci/ntf.c9
-rw-r--r--net/nfc/nci/rsp.c2
-rw-r--r--net/nfc/nci/uart.c18
-rw-r--r--net/nfc/netlink.c15
-rw-r--r--net/openvswitch/meter.c1
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/qrtr/Makefile3
-rw-r--r--net/qrtr/af_qrtr.c (renamed from net/qrtr/qrtr.c)0
-rw-r--r--net/rose/af_rose.c5
-rw-r--r--net/rose/rose_dev.c8
-rw-r--r--net/rose/rose_link.c8
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rxrpc/rtt.c2
-rw-r--r--net/sched/act_api.c21
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_ife.c4
-rw-r--r--net/sched/act_mpls.c2
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/cls_flower.c9
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c31
-rw-r--r--net/sched/sch_atm.c6
-rw-r--r--net/sched/sch_cbq.c15
-rw-r--r--net/sched/sch_drr.c13
-rw-r--r--net/sched/sch_ets.c17
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_fq_codel.c20
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sched/sch_gred.c65
-rw-r--r--net/sched/sch_hfsc.c11
-rw-r--r--net/sched/sch_htb.c51
-rw-r--r--net/sched/sch_mq.c31
-rw-r--r--net/sched/sch_mqprio.c58
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c2
-rw-r--r--net/sched/sch_prio.c4
-rw-r--r--net/sched/sch_qfq.c13
-rw-r--r--net/sched/sch_taprio.c33
-rw-r--r--net/sched/sch_tbf.c16
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/output.c13
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c173
-rw-r--r--net/sctp/socket.c5
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/smc/Makefile2
-rw-r--r--net/smc/af_smc.c469
-rw-r--r--net/smc/smc.h23
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_clc.c466
-rw-r--r--net/smc/smc_clc.h72
-rw-r--r--net/smc/smc_core.c214
-rw-r--r--net/smc/smc_core.h51
-rw-r--r--net/smc/smc_ib.c160
-rw-r--r--net/smc/smc_ib.h16
-rw-r--r--net/smc/smc_ism.c16
-rw-r--r--net/smc/smc_ism.h2
-rw-r--r--net/smc/smc_llc.c686
-rw-r--r--net/smc/smc_llc.h12
-rw-r--r--net/smc/smc_netlink.c47
-rw-r--r--net/smc/smc_netlink.h2
-rw-r--r--net/smc/smc_pnet.c41
-rw-r--r--net/smc/smc_rx.c3
-rw-r--r--net/smc/smc_tracepoint.c9
-rw-r--r--net/smc/smc_tracepoint.h116
-rw-r--r--net/smc/smc_tx.c25
-rw-r--r--net/smc/smc_wr.c237
-rw-r--r--net/smc/smc_wr.h22
-rw-r--r--net/strparser/strparser.c10
-rw-r--r--net/sunrpc/addr.c40
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/clnt.c33
-rw-r--r--net/sunrpc/sched.c20
-rw-r--r--net/sunrpc/svc.c80
-rw-r--r--net/sunrpc/svc_xprt.c1
-rw-r--r--net/sunrpc/sysfs.c12
-rw-r--r--net/sunrpc/xdr.c32
-rw-r--r--net/sunrpc/xprt.c41
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c48
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c23
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c30
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c14
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h6
-rw-r--r--net/sunrpc/xprtsock.c109
-rw-r--r--net/switchdev/switchdev.c156
-rw-r--r--net/sysctl_net.c2
-rw-r--r--net/tipc/bearer.c4
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/crypto.c32
-rw-r--r--net/tipc/eth_media.c2
-rw-r--r--net/tipc/ib_media.c2
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_main.c92
-rw-r--r--net/tls/tls_sw.c75
-rw-r--r--net/unix/af_unix.c100
-rw-r--r--net/unix/unix_bpf.c2
-rw-r--r--net/vmw_vsock/af_vsock.c82
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/core.c12
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/wireless/nl80211.c452
-rw-r--r--net/wireless/rdev-ops.h14
-rw-r--r--net/wireless/scan.c66
-rw-r--r--net/wireless/trace.h31
-rw-r--r--net/wireless/util.c16
-rw-r--r--net/xdp/xsk.c15
-rw-r--r--net/xdp/xsk_buff_pool.c132
-rw-r--r--net/xdp/xsk_queue.h12
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_user.c69
434 files changed, 14146 insertions, 7023 deletions
diff --git a/net/802/hippi.c b/net/802/hippi.c
index f80b33a8f7e0..887e73d520e4 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -121,7 +121,7 @@ int hippi_mac_addr(struct net_device *dev, void *p)
struct sockaddr *addr = p;
if (netif_running(dev))
return -EBUSY;
- memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ dev_addr_set(dev, addr->sa_data);
return 0;
}
EXPORT_SYMBOL(hippi_mac_addr);
diff --git a/net/802/p8022.c b/net/802/p8022.c
index a6585627051d..79c23173116c 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -23,7 +23,7 @@
#include <net/p8022.h>
static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb,
- unsigned char *dest)
+ const unsigned char *dest)
{
llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap);
return 0;
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 4492e8d7ad20..1406bfdbda13 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -79,7 +79,7 @@ drop:
* Put a SNAP header on a frame and pass to 802.2
*/
static int snap_request(struct datalink_proto *dl,
- struct sk_buff *skb, u8 *dest)
+ struct sk_buff *skb, const u8 *dest)
{
memcpy(skb_push(skb, 5), dl->type, 5);
llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 55275ef9a31a..a3a0a5e994f5 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -123,9 +123,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
}
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
-
- /* Get rid of the vlan's reference to real_dev */
- dev_put(real_dev);
}
int vlan_check_real_dev(struct net_device *real_dev,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 0c21d1fec852..ab6dee28536d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -250,7 +250,7 @@ bool vlan_dev_inherit_address(struct net_device *dev,
if (dev->addr_assign_type != NET_ADDR_STOLEN)
return false;
- ether_addr_copy(dev->dev_addr, real_dev->dev_addr);
+ eth_hw_addr_set(dev, real_dev->dev_addr);
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return true;
}
@@ -349,7 +349,7 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
dev_uc_del(real_dev, dev->dev_addr);
out:
- ether_addr_copy(dev->dev_addr, addr->sa_data);
+ eth_hw_addr_set(dev, addr->sa_data);
return 0;
}
@@ -586,7 +586,7 @@ static int vlan_dev_init(struct net_device *dev)
dev->dev_id = real_dev->dev_id;
if (is_zero_ether_addr(dev->dev_addr)) {
- ether_addr_copy(dev->dev_addr, real_dev->dev_addr);
+ eth_hw_addr_set(dev, real_dev->dev_addr);
dev->addr_assign_type = NET_ADDR_STOLEN;
}
if (is_zero_ether_addr(dev->broadcast))
@@ -843,6 +843,9 @@ static void vlan_dev_free(struct net_device *dev)
free_percpu(vlan->vlan_pcpu_stats);
vlan->vlan_pcpu_stats = NULL;
+
+ /* Get rid of the vlan's reference to real_dev */
+ dev_put(vlan->real_dev);
}
void vlan_setup(struct net_device *dev)
diff --git a/net/9p/client.c b/net/9p/client.c
index 213f12ed76cd..d062f1e5bfb0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * net/9p/clnt.c
- *
* 9P Client
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
@@ -32,10 +30,9 @@
#define DEFAULT_MSIZE (128 * 1024)
-/*
- * Client Option Parsing (code inspired by NFS code)
- * - a little lazy - parse all client options
- */
+/* Client Option Parsing (code inspired by NFS code)
+ * - a little lazy - parse all client options
+ */
enum {
Opt_msize,
@@ -89,20 +86,18 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
}
EXPORT_SYMBOL(p9_show_client_options);
-/*
- * Some error codes are taken directly from the server replies,
+/* Some error codes are taken directly from the server replies,
* make sure they are valid.
*/
static int safe_errno(int err)
{
- if ((err > 0) || (err < -MAX_ERRNO)) {
+ if (err > 0 || err < -MAX_ERRNO) {
p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
return -EPROTO;
}
return err;
}
-
/* Interpret mount option for protocol version */
static int get_protocol_version(char *s)
{
@@ -117,8 +112,9 @@ static int get_protocol_version(char *s)
} else if (!strcmp(s, "9p2000.L")) {
version = p9_proto_2000L;
p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
- } else
+ } else {
pr_info("Unknown protocol version %s\n", s);
+ }
return version;
}
@@ -147,15 +143,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return 0;
tmp_options = kstrdup(opts, GFP_KERNEL);
- if (!tmp_options) {
- p9_debug(P9_DEBUG_ERROR,
- "failed to allocate copy of option string\n");
+ if (!tmp_options)
return -ENOMEM;
- }
options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
int token, r;
+
if (!*p)
continue;
token = match_token(p, tokens, args);
@@ -187,7 +181,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
v9fs_put_trans(clnt->trans_mod);
clnt->trans_mod = v9fs_get_trans_by_name(s);
- if (clnt->trans_mod == NULL) {
+ if (!clnt->trans_mod) {
pr_info("Could not find request transport: %s\n",
s);
ret = -EINVAL;
@@ -379,6 +373,7 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
static void p9_req_free(struct kref *ref)
{
struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
+
p9_fcall_fini(&r->tc);
p9_fcall_fini(&r->rc);
kmem_cache_free(p9_req_cache, r);
@@ -423,8 +418,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
- /*
- * This barrier is needed to make sure any change made to req before
+ /* This barrier is needed to make sure any change made to req before
* the status change is visible to another thread
*/
smp_wmb();
@@ -446,12 +440,12 @@ EXPORT_SYMBOL(p9_client_cb);
*/
int
-p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
- int rewind)
+p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
+ int16_t *tag, int rewind)
{
- int8_t r_type;
- int16_t r_tag;
- int32_t r_size;
+ s8 r_type;
+ s16 r_tag;
+ s32 r_size;
int offset = pdu->offset;
int err;
@@ -499,7 +493,7 @@ EXPORT_SYMBOL(p9_parse_header);
static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
{
- int8_t type;
+ s8 type;
int err;
int ecode;
@@ -510,8 +504,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
req->rc.size);
return -EIO;
}
- /*
- * dump the response from server
+ /* dump the response from server
* This should be after check errors which poplulate pdu_fcall.
*/
trace_9p_protocol_dump(c, &req->rc);
@@ -524,6 +517,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
if (!p9_is_proto_dotl(c)) {
char *ename;
+
err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
@@ -541,6 +535,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
kfree(ename);
} else {
err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
+ if (err)
+ goto out_err;
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -572,12 +568,11 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
{
int err;
int ecode;
- int8_t type;
+ s8 type;
char *ename = NULL;
err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
- /*
- * dump the response from server
+ /* dump the response from server
* This should be after parse_header which poplulate pdu_fcall.
*/
trace_9p_protocol_dump(c, &req->rc);
@@ -605,7 +600,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
if (len > inline_len) {
/* We have error in external buffer */
if (!copy_from_iter_full(ename + inline_len,
- len - inline_len, uidata)) {
+ len - inline_len, uidata)) {
err = -EFAULT;
goto out_err;
}
@@ -657,7 +652,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
{
struct p9_req_t *req;
- int16_t oldtag;
+ s16 oldtag;
int err;
err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
@@ -670,8 +665,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
if (IS_ERR(req))
return PTR_ERR(req);
- /*
- * if we haven't received a response for oldreq,
+ /* if we haven't received a response for oldreq,
* remove it from the list
*/
if (oldreq->status == REQ_STATUS_SENT) {
@@ -697,7 +691,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
return ERR_PTR(-EIO);
/* if status is begin_disconnected we allow only clunk request */
- if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
+ if (c->status == BeginDisconnect && type != P9_TCLUNK)
return ERR_PTR(-EIO);
req = p9_tag_alloc(c, type, req_size);
@@ -745,8 +739,9 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if (signal_pending(current)) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
- } else
+ } else {
sigpending = 0;
+ }
err = c->trans_mod->request(c, req);
if (err < 0) {
@@ -760,14 +755,13 @@ again:
/* Wait for the response */
err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
- /*
- * Make sure our req is coherent with regard to updates in other
+ /* Make sure our req is coherent with regard to updates in other
* threads - echoes to wmb() in the callback
*/
smp_rmb();
- if ((err == -ERESTARTSYS) && (c->status == Connected)
- && (type == P9_TFLUSH)) {
+ if (err == -ERESTARTSYS && c->status == Connected &&
+ type == P9_TFLUSH) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
goto again;
@@ -777,7 +771,7 @@ again:
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
}
- if ((err == -ERESTARTSYS) && (c->status == Connected)) {
+ if (err == -ERESTARTSYS && c->status == Connected) {
p9_debug(P9_DEBUG_MUX, "flushing\n");
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
@@ -832,8 +826,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
struct p9_req_t *req;
va_start(ap, fmt);
- /*
- * We allocate a inline protocol data of only 4k bytes.
+ /* We allocate a inline protocol data of only 4k bytes.
* The actual content is passed in zero-copy fashion.
*/
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
@@ -844,8 +837,9 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (signal_pending(current)) {
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
- } else
+ } else {
sigpending = 0;
+ }
err = c->trans_mod->zc_request(c, req, uidata, uodata,
inlen, olen, in_hdrlen);
@@ -859,7 +853,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
}
- if ((err == -ERESTARTSYS) && (c->status == Connected)) {
+ if (err == -ERESTARTSYS && c->status == Connected) {
p9_debug(P9_DEBUG_MUX, "flushing\n");
sigpending = 1;
clear_thread_flag(TIF_SIGPENDING);
@@ -895,11 +889,11 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
struct p9_fid *fid;
p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
- fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+ fid = kmalloc(sizeof(*fid), GFP_KERNEL);
if (!fid)
return NULL;
- memset(&fid->qid, 0, sizeof(struct p9_qid));
+ memset(&fid->qid, 0, sizeof(fid->qid));
fid->mode = -1;
fid->uid = current_fsuid();
fid->clnt = clnt;
@@ -947,15 +941,15 @@ static int p9_client_version(struct p9_client *c)
switch (c->proto_version) {
case p9_proto_2000L:
req = p9_client_rpc(c, P9_TVERSION, "ds",
- c->msize, "9P2000.L");
+ c->msize, "9P2000.L");
break;
case p9_proto_2000u:
req = p9_client_rpc(c, P9_TVERSION, "ds",
- c->msize, "9P2000.u");
+ c->msize, "9P2000.u");
break;
case p9_proto_legacy:
req = p9_client_rpc(c, P9_TVERSION, "ds",
- c->msize, "9P2000");
+ c->msize, "9P2000");
break;
default:
return -EINVAL;
@@ -972,13 +966,13 @@ static int p9_client_version(struct p9_client *c)
}
p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
- if (!strncmp(version, "9P2000.L", 8))
+ if (!strncmp(version, "9P2000.L", 8)) {
c->proto_version = p9_proto_2000L;
- else if (!strncmp(version, "9P2000.u", 8))
+ } else if (!strncmp(version, "9P2000.u", 8)) {
c->proto_version = p9_proto_2000u;
- else if (!strncmp(version, "9P2000", 6))
+ } else if (!strncmp(version, "9P2000", 6)) {
c->proto_version = p9_proto_legacy;
- else {
+ } else {
p9_debug(P9_DEBUG_ERROR,
"server returned an unknown version: %s\n", version);
err = -EREMOTEIO;
@@ -1008,7 +1002,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
char *client_id;
err = 0;
- clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
+ clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
return ERR_PTR(-ENOMEM);
@@ -1030,7 +1024,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (!clnt->trans_mod)
clnt->trans_mod = v9fs_get_default_trans();
- if (clnt->trans_mod == NULL) {
+ if (!clnt->trans_mod) {
err = -EPROTONOSUPPORT;
p9_debug(P9_DEBUG_ERROR,
"No transport defined or default transport\n");
@@ -1118,14 +1112,14 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
EXPORT_SYMBOL(p9_client_begin_disconnect);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
- const char *uname, kuid_t n_uname, const char *aname)
+ const char *uname, kuid_t n_uname,
+ const char *aname)
{
int err = 0;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
-
p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
afid ? afid->fid : -1, uname, aname);
fid = p9_fid_create(clnt);
@@ -1136,7 +1130,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
fid->uid = n_uname;
req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
- afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+ afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1150,7 +1144,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
}
p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
- qid.type, (unsigned long long)qid.path, qid.version);
+ qid.type, qid.path, qid.version);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
@@ -1165,14 +1159,14 @@ error:
EXPORT_SYMBOL(p9_client_attach);
struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
- const unsigned char * const *wnames, int clone)
+ const unsigned char * const *wnames, int clone)
{
int err;
struct p9_client *clnt;
struct p9_fid *fid;
struct p9_qid *wqids;
struct p9_req_t *req;
- uint16_t nwqids, count;
+ u16 nwqids, count;
err = 0;
wqids = NULL;
@@ -1185,14 +1179,14 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
}
fid->uid = oldfid->uid;
- } else
+ } else {
fid = oldfid;
-
+ }
p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
- nwname, wnames);
+ nwname, wnames);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1215,9 +1209,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
for (count = 0; count < nwqids; count++)
p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n",
- count, wqids[count].type,
- (unsigned long long)wqids[count].path,
- wqids[count].version);
+ count, wqids[count].type,
+ wqids[count].path,
+ wqids[count].version);
if (nwname)
memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
@@ -1233,7 +1227,7 @@ clunk_fid:
fid = NULL;
error:
- if (fid && (fid != oldfid))
+ if (fid && fid != oldfid)
p9_fid_destroy(fid);
return ERR_PTR(err);
@@ -1250,7 +1244,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
- p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
+ p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
err = 0;
if (fid->mode != -1)
@@ -1272,8 +1266,8 @@ int p9_client_open(struct p9_fid *fid, int mode)
}
p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
- p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
- (unsigned long long)qid.path, qid.version, iounit);
+ p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
+ qid.path, qid.version, iounit);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
@@ -1286,8 +1280,8 @@ error:
}
EXPORT_SYMBOL(p9_client_open);
-int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
- kgid_t gid, struct p9_qid *qid)
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
+ u32 mode, kgid_t gid, struct p9_qid *qid)
{
int err = 0;
struct p9_client *clnt;
@@ -1295,16 +1289,16 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
int iounit;
p9_debug(P9_DEBUG_9P,
- ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
- ofid->fid, name, flags, mode,
- from_kgid(&init_user_ns, gid));
+ ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
+ ofid->fid, name, flags, mode,
+ from_kgid(&init_user_ns, gid));
clnt = ofid->clnt;
if (ofid->mode != -1)
return -EINVAL;
req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
- mode, gid);
+ mode, gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1317,9 +1311,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
}
p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
- qid->type,
- (unsigned long long)qid->path,
- qid->version, iounit);
+ qid->type, qid->path, qid->version, iounit);
memmove(&ofid->qid, qid, sizeof(struct p9_qid));
ofid->mode = mode;
@@ -1342,7 +1334,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
int iounit;
p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
- fid->fid, name, perm, mode);
+ fid->fid, name, perm, mode);
err = 0;
clnt = fid->clnt;
@@ -1350,7 +1342,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
return -EINVAL;
req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
- mode, extension);
+ mode, extension);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1363,9 +1355,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
}
p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
- qid.type,
- (unsigned long long)qid.path,
- qid.version, iounit);
+ qid.type, qid.path, qid.version, iounit);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
@@ -1379,18 +1369,18 @@ error:
EXPORT_SYMBOL(p9_client_fcreate);
int p9_client_symlink(struct p9_fid *dfid, const char *name,
- const char *symtgt, kgid_t gid, struct p9_qid *qid)
+ const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n",
- dfid->fid, name, symtgt);
+ dfid->fid, name, symtgt);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
- gid);
+ gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1403,7 +1393,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
}
p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
- qid->type, (unsigned long long)qid->path, qid->version);
+ qid->type, qid->path, qid->version);
free_and_error:
p9_tag_remove(clnt, req);
@@ -1418,10 +1408,10 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
- dfid->fid, oldfid->fid, newname);
+ dfid->fid, oldfid->fid, newname);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
- newname);
+ newname);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1438,7 +1428,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
- fid->fid, datasync);
+ fid->fid, datasync);
err = 0;
clnt = fid->clnt;
@@ -1474,8 +1464,8 @@ int p9_client_clunk(struct p9_fid *fid)
return 0;
again:
- p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,
- retries);
+ p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
+ fid->fid, retries);
err = 0;
clnt = fid->clnt;
@@ -1489,16 +1479,16 @@ again:
p9_tag_remove(clnt, req);
error:
- /*
- * Fid is not valid even after a failed clunk
+ /* Fid is not valid even after a failed clunk
* If interrupted, retry once then give up and
* leak fid until umount.
*/
if (err == -ERESTARTSYS) {
if (retries++ == 0)
goto again;
- } else
+ } else {
p9_fid_destroy(fid);
+ }
return err;
}
EXPORT_SYMBOL(p9_client_clunk);
@@ -1538,7 +1528,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n",
- dfid->fid, name, flags);
+ dfid->fid, name, flags);
clnt = dfid->clnt;
req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags);
@@ -1584,8 +1574,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
char *dataptr;
*err = 0;
- p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
- fid->fid, (unsigned long long) offset, (int)iov_iter_count(to));
+ p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %zu\n",
+ fid->fid, offset, iov_iter_count(to));
rsize = fid->iounit;
if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
@@ -1651,13 +1641,13 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
*err = 0;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
- fid->fid, (unsigned long long) offset,
- iov_iter_count(from));
+ fid->fid, offset, iov_iter_count(from));
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
- if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
+
+ if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
rsize = clnt->msize - P9_IOHDRSZ;
if (count < rsize)
@@ -1670,7 +1660,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
fid->fid, offset, rsize);
} else {
req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
- offset, rsize, from);
+ offset, rsize, from);
}
if (IS_ERR(req)) {
*err = PTR_ERR(req);
@@ -1703,12 +1693,13 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;
struct p9_client *clnt;
- struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL);
+ struct p9_wstat *ret;
struct p9_req_t *req;
u16 ignored;
p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
+ ret = kmalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
@@ -1729,17 +1720,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
}
p9_debug(P9_DEBUG_9P,
- "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
- "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
- "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
- "<<< uid=%d gid=%d n_muid=%d\n",
- ret->size, ret->type, ret->dev, ret->qid.type,
- (unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
- ret->atime, ret->mtime, (unsigned long long)ret->length,
- ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
- from_kuid(&init_user_ns, ret->n_uid),
- from_kgid(&init_user_ns, ret->n_gid),
- from_kuid(&init_user_ns, ret->n_muid));
+ "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+ "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+ "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+ "<<< uid=%d gid=%d n_muid=%d\n",
+ ret->size, ret->type, ret->dev, ret->qid.type, ret->qid.path,
+ ret->qid.version, ret->mode,
+ ret->atime, ret->mtime, ret->length,
+ ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
+ from_kuid(&init_user_ns, ret->n_uid),
+ from_kgid(&init_user_ns, ret->n_gid),
+ from_kuid(&init_user_ns, ret->n_muid));
p9_tag_remove(clnt, req);
return ret;
@@ -1751,17 +1742,17 @@ error:
EXPORT_SYMBOL(p9_client_stat);
struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
- u64 request_mask)
+ u64 request_mask)
{
int err;
struct p9_client *clnt;
- struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl),
- GFP_KERNEL);
+ struct p9_stat_dotl *ret;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
- fid->fid, request_mask);
+ fid->fid, request_mask);
+ ret = kmalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
@@ -1781,26 +1772,27 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
goto error;
}
- p9_debug(P9_DEBUG_9P,
- "<<< RGETATTR st_result_mask=%lld\n"
- "<<< qid=%x.%llx.%x\n"
- "<<< st_mode=%8.8x st_nlink=%llu\n"
- "<<< st_uid=%d st_gid=%d\n"
- "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
- "<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
- "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
- "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
- "<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
- "<<< st_gen=%lld st_data_version=%lld\n",
- ret->st_result_mask, ret->qid.type, ret->qid.path,
- ret->qid.version, ret->st_mode, ret->st_nlink,
- from_kuid(&init_user_ns, ret->st_uid),
- from_kgid(&init_user_ns, ret->st_gid),
- ret->st_rdev, ret->st_size, ret->st_blksize,
- ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
- ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
- ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
- ret->st_gen, ret->st_data_version);
+ p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n"
+ "<<< qid=%x.%llx.%x\n"
+ "<<< st_mode=%8.8x st_nlink=%llu\n"
+ "<<< st_uid=%d st_gid=%d\n"
+ "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
+ "<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
+ "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
+ "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
+ "<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
+ "<<< st_gen=%lld st_data_version=%lld\n",
+ ret->st_result_mask,
+ ret->qid.type, ret->qid.path, ret->qid.version,
+ ret->st_mode, ret->st_nlink,
+ from_kuid(&init_user_ns, ret->st_uid),
+ from_kgid(&init_user_ns, ret->st_gid),
+ ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks,
+ ret->st_atime_sec, ret->st_atime_nsec,
+ ret->st_mtime_sec, ret->st_mtime_nsec,
+ ret->st_ctime_sec, ret->st_ctime_nsec,
+ ret->st_btime_sec, ret->st_btime_nsec,
+ ret->st_gen, ret->st_data_version);
p9_tag_remove(clnt, req);
return ret;
@@ -1819,7 +1811,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
/* size[2] type[2] dev[4] qid[13] */
/* mode[4] atime[4] mtime[4] length[8]*/
/* name[s] uid[s] gid[s] muid[s] */
- ret = 2+4+13+4+4+4+8+2+2+2+2;
+ ret = 2 + 4 + 13 + 4 + 4 + 4 + 8 + 2 + 2 + 2 + 2;
if (wst->name)
ret += strlen(wst->name);
@@ -1830,9 +1822,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
if (wst->muid)
ret += strlen(wst->muid);
- if ((proto_version == p9_proto_2000u) ||
- (proto_version == p9_proto_2000L)) {
- ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
+ if (proto_version == p9_proto_2000u ||
+ proto_version == p9_proto_2000L) {
+ /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
+ ret += 2 + 4 + 4 + 4;
if (wst->extension)
ret += strlen(wst->extension);
}
@@ -1849,21 +1842,23 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
err = 0;
clnt = fid->clnt;
wst->size = p9_client_statsize(wst, clnt->proto_version);
- p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
+ p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
+ fid->fid);
p9_debug(P9_DEBUG_9P,
- " sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
- " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
- " name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
- " uid=%d gid=%d n_muid=%d\n",
- wst->size, wst->type, wst->dev, wst->qid.type,
- (unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
- wst->atime, wst->mtime, (unsigned long long)wst->length,
- wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
- from_kuid(&init_user_ns, wst->n_uid),
- from_kgid(&init_user_ns, wst->n_gid),
- from_kuid(&init_user_ns, wst->n_muid));
-
- req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
+ " sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+ " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+ " name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+ " uid=%d gid=%d n_muid=%d\n",
+ wst->size, wst->type, wst->dev, wst->qid.type,
+ wst->qid.path, wst->qid.version,
+ wst->mode, wst->atime, wst->mtime, wst->length,
+ wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
+ from_kuid(&init_user_ns, wst->n_uid),
+ from_kgid(&init_user_ns, wst->n_gid),
+ from_kuid(&init_user_ns, wst->n_muid));
+
+ req = p9_client_rpc(clnt, P9_TWSTAT, "dwS",
+ fid->fid, wst->size + 2, wst);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1886,15 +1881,15 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
- p9_debug(P9_DEBUG_9P,
- " valid=%x mode=%x uid=%d gid=%d size=%lld\n"
- " atime_sec=%lld atime_nsec=%lld\n"
- " mtime_sec=%lld mtime_nsec=%lld\n",
- p9attr->valid, p9attr->mode,
- from_kuid(&init_user_ns, p9attr->uid),
- from_kgid(&init_user_ns, p9attr->gid),
- p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
- p9attr->mtime_sec, p9attr->mtime_nsec);
+ p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
+ p9attr->valid, p9attr->mode,
+ from_kuid(&init_user_ns, p9attr->uid),
+ from_kgid(&init_user_ns, p9attr->gid),
+ p9attr->size);
+ p9_debug(P9_DEBUG_9P, " atime_sec=%lld atime_nsec=%lld\n",
+ p9attr->atime_sec, p9attr->atime_nsec);
+ p9_debug(P9_DEBUG_9P, " mtime_sec=%lld mtime_nsec=%lld\n",
+ p9attr->mtime_sec, p9attr->mtime_nsec);
req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
@@ -1935,12 +1930,10 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
goto error;
}
- p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
- "blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
- "fsid %llu namelen %ld\n",
- fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
- sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
- sb->fsid, (long int)sb->namelen);
+ p9_debug(P9_DEBUG_9P,
+ "<<< RSTATFS fid %d type 0x%x bsize %u blocks %llu bfree %llu bavail %llu files %llu ffree %llu fsid %llu namelen %u\n",
+ fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree,
+ sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen);
p9_tag_remove(clnt, req);
error:
@@ -1959,10 +1952,10 @@ int p9_client_rename(struct p9_fid *fid,
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
- fid->fid, newdirfid->fid, name);
+ fid->fid, newdirfid->fid, name);
req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
- newdirfid->fid, name);
+ newdirfid->fid, name);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1986,9 +1979,9 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
err = 0;
clnt = olddirfid->clnt;
- p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s"
- " newdirfid %d new name %s\n", olddirfid->fid, old_name,
- newdirfid->fid, new_name);
+ p9_debug(P9_DEBUG_9P,
+ ">>> TRENAMEAT olddirfid %d old name %s newdirfid %d new name %s\n",
+ olddirfid->fid, old_name, newdirfid->fid, new_name);
req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid,
old_name, newdirfid->fid, new_name);
@@ -1998,7 +1991,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
}
p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
- newdirfid->fid, new_name);
+ newdirfid->fid, new_name);
p9_tag_remove(clnt, req);
error:
@@ -2006,11 +1999,10 @@ error:
}
EXPORT_SYMBOL(p9_client_renameat);
-/*
- * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
+/* An xattrwalk without @attr_name gives the fid for the lisxattr namespace
*/
struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
- const char *attr_name, u64 *attr_size)
+ const char *attr_name, u64 *attr_size)
{
int err;
struct p9_req_t *req;
@@ -2025,11 +2017,11 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
goto error;
}
p9_debug(P9_DEBUG_9P,
- ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
- file_fid->fid, attr_fid->fid, attr_name);
+ ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
+ file_fid->fid, attr_fid->fid, attr_name);
req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
- file_fid->fid, attr_fid->fid, attr_name);
+ file_fid->fid, attr_fid->fid, attr_name);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -2042,13 +2034,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
}
p9_tag_remove(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
- attr_fid->fid, *attr_size);
+ attr_fid->fid, *attr_size);
return attr_fid;
clunk_fid:
p9_client_clunk(attr_fid);
attr_fid = NULL;
error:
- if (attr_fid && (attr_fid != file_fid))
+ if (attr_fid && attr_fid != file_fid)
p9_fid_destroy(attr_fid);
return ERR_PTR(err);
@@ -2056,19 +2048,19 @@ error:
EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
- u64 attr_size, int flags)
+ u64 attr_size, int flags)
{
int err;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P,
- ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n",
- fid->fid, name, (long long)attr_size, flags);
+ ">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
+ fid->fid, name, attr_size, flags);
err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
- fid->fid, name, attr_size, flags);
+ fid->fid, name, attr_size, flags);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -2092,13 +2084,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
iov_iter_kvec(&to, READ, &kv, 1, count);
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
- fid->fid, (unsigned long long) offset, count);
+ fid->fid, offset, count);
err = 0;
clnt = fid->clnt;
rsize = fid->iounit;
- if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
+ if (!rsize || rsize > clnt->msize - P9_READDIRHDRSZ)
rsize = clnt->msize - P9_READDIRHDRSZ;
if (count < rsize)
@@ -2106,8 +2098,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
- /*
- * response header len is 11
+ /* response header len is 11
* PDU Header(7) + IO Size (4)
*/
req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0,
@@ -2148,7 +2139,7 @@ error:
EXPORT_SYMBOL(p9_client_readdir);
int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
- dev_t rdev, kgid_t gid, struct p9_qid *qid)
+ dev_t rdev, kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
@@ -2156,10 +2147,11 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
err = 0;
clnt = fid->clnt;
- p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
- "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
+ p9_debug(P9_DEBUG_9P,
+ ">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
+ fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
- MAJOR(rdev), MINOR(rdev), gid);
+ MAJOR(rdev), MINOR(rdev), gid);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2168,18 +2160,17 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
- p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
- (unsigned long long)qid->path, qid->version);
+ p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n",
+ qid->type, qid->path, qid->version);
error:
p9_tag_remove(clnt, req);
return err;
-
}
EXPORT_SYMBOL(p9_client_mknod_dotl);
int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
- kgid_t gid, struct p9_qid *qid)
+ kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
@@ -2189,8 +2180,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
fid->fid, name, mode, from_kgid(&init_user_ns, gid));
- req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,
- gid);
+ req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg",
+ fid->fid, name, mode, gid);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2200,12 +2191,11 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
- (unsigned long long)qid->path, qid->version);
+ qid->path, qid->version);
error:
p9_tag_remove(clnt, req);
return err;
-
}
EXPORT_SYMBOL(p9_client_mkdir_dotl);
@@ -2217,14 +2207,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
err = 0;
clnt = fid->clnt;
- p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
- "start %lld length %lld proc_id %d client_id %s\n",
- fid->fid, flock->type, flock->flags, flock->start,
- flock->length, flock->proc_id, flock->client_id);
+ p9_debug(P9_DEBUG_9P,
+ ">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
+ fid->fid, flock->type, flock->flags, flock->start,
+ flock->length, flock->proc_id, flock->client_id);
req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
- flock->flags, flock->start, flock->length,
- flock->proc_id, flock->client_id);
+ flock->flags, flock->start, flock->length,
+ flock->proc_id, flock->client_id);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2238,7 +2228,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
error:
p9_tag_remove(clnt, req);
return err;
-
}
EXPORT_SYMBOL(p9_client_lock_dotl);
@@ -2250,12 +2239,14 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
err = 0;
clnt = fid->clnt;
- p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
- "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
- glock->start, glock->length, glock->proc_id, glock->client_id);
+ p9_debug(P9_DEBUG_9P,
+ ">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
+ fid->fid, glock->type, glock->start, glock->length,
+ glock->proc_id, glock->client_id);
- req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
- glock->start, glock->length, glock->proc_id, glock->client_id);
+ req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid,
+ glock->type, glock->start, glock->length,
+ glock->proc_id, glock->client_id);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2267,9 +2258,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
- p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
- "proc_id %d client_id %s\n", glock->type, glock->start,
- glock->length, glock->proc_id, glock->client_id);
+ p9_debug(P9_DEBUG_9P,
+ "<<< RGETLOCK type %i start %lld length %lld proc_id %d client_id %s\n",
+ glock->type, glock->start, glock->length,
+ glock->proc_id, glock->client_id);
error:
p9_tag_remove(clnt, req);
return err;
diff --git a/net/9p/error.c b/net/9p/error.c
index 61c18daf3050..8da744494b68 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/fs/9p/error.c
- *
* Error string handling
*
* Plan 9 uses error strings, Unix uses error numbers. These functions
@@ -185,7 +183,7 @@ int p9_error_init(void)
INIT_HLIST_HEAD(&hash_errmap[bucket]);
/* load initial error map into hash table */
- for (c = errmap; c->name != NULL; c++) {
+ for (c = errmap; c->name; c++) {
c->namelen = strlen(c->name);
bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
INIT_HLIST_NODE(&c->list);
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 5126566850bd..c37fc201a944 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * net/9p/9p.c
- *
* 9P entry point
*
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
@@ -12,6 +10,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
@@ -24,13 +23,13 @@
#include <linux/spinlock.h>
#ifdef CONFIG_NET_9P_DEBUG
-unsigned int p9_debug_level = 0; /* feature-rific global debug level */
+unsigned int p9_debug_level; /* feature-rific global debug level */
EXPORT_SYMBOL(p9_debug_level);
module_param_named(debug, p9_debug_level, uint, 0);
MODULE_PARM_DESC(debug, "9P debugging level");
void _p9_debug(enum p9_debug_flags level, const char *func,
- const char *fmt, ...)
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -53,10 +52,7 @@ void _p9_debug(enum p9_debug_flags level, const char *func,
EXPORT_SYMBOL(_p9_debug);
#endif
-/*
- * Dynamic Transport Registration Routines
- *
- */
+/* Dynamic Transport Registration Routines */
static DEFINE_SPINLOCK(v9fs_trans_lock);
static LIST_HEAD(v9fs_trans_list);
@@ -87,12 +83,7 @@ void v9fs_unregister_trans(struct p9_trans_module *m)
}
EXPORT_SYMBOL(v9fs_unregister_trans);
-/**
- * v9fs_get_trans_by_name - get transport with the matching name
- * @s: string identifying transport
- *
- */
-struct p9_trans_module *v9fs_get_trans_by_name(char *s)
+static struct p9_trans_module *_p9_get_trans_by_name(char *s)
{
struct p9_trans_module *t, *found = NULL;
@@ -106,6 +97,28 @@ struct p9_trans_module *v9fs_get_trans_by_name(char *s)
}
spin_unlock(&v9fs_trans_lock);
+
+ return found;
+}
+
+/**
+ * v9fs_get_trans_by_name - get transport with the matching name
+ * @s: string identifying transport
+ *
+ */
+struct p9_trans_module *v9fs_get_trans_by_name(char *s)
+{
+ struct p9_trans_module *found = NULL;
+
+ found = _p9_get_trans_by_name(s);
+
+#ifdef CONFIG_MODULES
+ if (!found) {
+ request_module("9p-%s", s);
+ found = _p9_get_trans_by_name(s);
+ }
+#endif
+
return found;
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 03593eb240d8..3754c33e2974 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * net/9p/protocol.c
- *
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
@@ -46,6 +44,7 @@ EXPORT_SYMBOL(p9stat_free);
size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
size_t len = min(pdu->size - pdu->offset, size);
+
memcpy(data, &pdu->sdata[pdu->offset], len);
pdu->offset += len;
return size - len;
@@ -54,6 +53,7 @@ size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
+
memcpy(&pdu->sdata[pdu->size], data, len);
pdu->size += len;
return size - len;
@@ -64,6 +64,7 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
struct iov_iter i = *from;
+
if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i))
len = 0;
@@ -71,26 +72,25 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
return size - len;
}
-/*
- b - int8_t
- w - int16_t
- d - int32_t
- q - int64_t
- s - string
- u - numeric uid
- g - numeric gid
- S - stat
- Q - qid
- D - data blob (int32_t size followed by void *, results are not freed)
- T - array of strings (int16_t count, followed by strings)
- R - array of qids (int16_t count, followed by qids)
- A - stat for 9p2000.L (p9_stat_dotl)
- ? - if optional = 1, continue parsing
-*/
+/* b - int8_t
+ * w - int16_t
+ * d - int32_t
+ * q - int64_t
+ * s - string
+ * u - numeric uid
+ * g - numeric gid
+ * S - stat
+ * Q - qid
+ * D - data blob (int32_t size followed by void *, results are not freed)
+ * T - array of strings (int16_t count, followed by strings)
+ * R - array of qids (int16_t count, followed by qids)
+ * A - stat for 9p2000.L (p9_stat_dotl)
+ * ? - if optional = 1, continue parsing
+ */
static int
p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
- va_list ap)
+ va_list ap)
{
const char *ptr;
int errcode = 0;
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 6835f91cfda5..6d719c30331a 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * net/9p/protocol.h
- *
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
@@ -11,7 +9,7 @@
*/
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
- va_list ap);
+ va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 6ea5ea548cd4..c827f694551c 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright IBM Corporation, 2010
* Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
*/
#include <linux/mm.h>
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index c43babb3f635..32134db6abf3 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright IBM Corporation, 2010
* Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
*/
-void p9_release_pages(struct page **, int);
+void p9_release_pages(struct page **pages, int nr_pages);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 007bbcc68010..827c47620fc0 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/fs/9p/trans_fd.c
- *
* Fd transport layer. Includes deprecated socket layer.
*
* Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index af0a8a6cd3fd..88e563826674 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/fs/9p/trans_rdma.c
- *
* RDMA transport layer based on the trans_fd.c implementation.
*
* Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
@@ -767,6 +765,7 @@ static void __exit p9_trans_rdma_exit(void)
module_init(p9_trans_rdma_init);
module_exit(p9_trans_rdma_exit);
+MODULE_ALIAS_9P("rdma");
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("RDMA Transport for 9P");
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 490a4c900339..bd5a89c4960d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -794,6 +794,7 @@ static void __exit p9_virtio_cleanup(void)
module_init(p9_virtio_init);
module_exit(p9_virtio_cleanup);
+MODULE_ALIAS_9P("virtio");
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 3ec1a51a6944..2418fa0b58f3 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -1,33 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/9p/trans_xen
*
* Xen transport layer.
*
* Copyright (C) 2017 by Stefano Stabellini <stefano@aporeto.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
*/
#include <xen/events.h>
@@ -552,6 +529,7 @@ static int p9_trans_xen_init(void)
return rc;
}
module_init(p9_trans_xen_init);
+MODULE_ALIAS_9P("xen");
static void p9_trans_xen_exit(void)
{
diff --git a/net/Kconfig b/net/Kconfig
index fb13460c6dab..074472dfa94a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID
config NET_RX_BUSY_POLL
bool
- default y
+ default y if !PREEMPT_RT
config BQL
bool
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index dd2a8dabed84..f666f2f98ba5 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -577,10 +577,12 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
pr_debug("vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc);
if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
unsigned char *esi = atmvcc->dev->esi;
+ const u8 one = 1;
+
if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
- memcpy(net_dev->dev_addr, esi, net_dev->addr_len);
+ dev_addr_set(net_dev, esi);
else
- net_dev->dev_addr[2] = 1;
+ dev_addr_mod(net_dev, 2, &one, 1);
}
list_add(&brvcc->brvccs, &brdev->brvccs);
write_unlock_irq(&devs_lock);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 7226c784dbe0..6257bf12e5a0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -340,12 +340,12 @@ static int lec_close(struct net_device *dev)
static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
{
+ static const u8 zero_addr[ETH_ALEN] = {};
unsigned long flags;
struct net_device *dev = (struct net_device *)vcc->proto_data;
struct lec_priv *priv = netdev_priv(dev);
struct atmlec_msg *mesg;
struct lec_arp_table *entry;
- int i;
char *tmp; /* FIXME */
WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
@@ -355,12 +355,10 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type);
switch (mesg->type) {
case l_set_mac_addr:
- for (i = 0; i < 6; i++)
- dev->dev_addr[i] = mesg->content.normal.mac_addr[i];
+ eth_hw_addr_set(dev, mesg->content.normal.mac_addr);
break;
case l_del_mac_addr:
- for (i = 0; i < 6; i++)
- dev->dev_addr[i] = 0;
+ eth_hw_addr_set(dev, zero_addr);
break;
case l_addr_delete:
lec_addr_delete(priv, mesg->content.normal.atm_addr,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2631efc6e359..2f34bbdde0e8 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -202,7 +202,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
* Find an AX.25 control block given both ends. It will only pick up
* floating AX.25 control blocks or non Raw socket bound control blocks.
*/
-ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
+ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr,
ax25_digi *digi, struct net_device *dev)
{
ax25_cb *s;
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 4ac2e0847652..d0a043a51848 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -35,7 +35,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
spin_lock_bh(&ax25_dev_lock);
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
- if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
+ if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
}
spin_unlock_bh(&ax25_dev_lock);
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index b4083f30af0d..979bc4b828a0 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -98,7 +98,7 @@ void ax25_linkfail_release(struct ax25_linkfail *lf)
EXPORT_SYMBOL(ax25_linkfail_release);
-int ax25_listen_register(ax25_address *callsign, struct net_device *dev)
+int ax25_listen_register(const ax25_address *callsign, struct net_device *dev)
{
struct listen_struct *listen;
@@ -121,7 +121,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev)
EXPORT_SYMBOL(ax25_listen_register);
-void ax25_listen_release(ax25_address *callsign, struct net_device *dev)
+void ax25_listen_release(const ax25_address *callsign, struct net_device *dev)
{
struct listen_struct *s, *listen;
@@ -171,7 +171,7 @@ int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *)
return res;
}
-int ax25_listen_mine(ax25_address *callsign, struct net_device *dev)
+int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev)
{
struct listen_struct *listen;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index cd6afe895db9..1cac25aca637 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -181,7 +181,7 @@ static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, i
}
static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
- ax25_address *dev_addr, struct packet_type *ptype)
+ const ax25_address *dev_addr, struct packet_type *ptype)
{
ax25_address src, dest, *next_digi = NULL;
int type = 0, mine = 0, dama;
@@ -447,5 +447,5 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
skb_pull(skb, AX25_KISS_HEADER_LEN); /* Remove the KISS byte */
- return ax25_rcv(skb, dev, (ax25_address *)dev->dev_addr, ptype);
+ return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype);
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 22f2f66c6e0a..3db76d2470e9 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -29,7 +29,7 @@
static DEFINE_SPINLOCK(ax25_frag_lock);
-ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev)
+ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *ax25;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 1669744304c5..2ed9496fc41f 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -254,7 +254,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv,
* Return: backbone gateway if found or NULL otherwise
*/
static struct batadv_bla_backbone_gw *
-batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
+batadv_backbone_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
@@ -336,7 +336,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
* @vid: the VLAN ID
* @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...)
*/
-static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
+static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
unsigned short vid, int claimtype)
{
struct sk_buff *skb;
@@ -488,7 +488,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
* Return: the (possibly created) backbone gateway or NULL on error
*/
static struct batadv_bla_backbone_gw *
-batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
+batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig,
unsigned short vid, bool own_backbone)
{
struct batadv_bla_backbone_gw *entry;
@@ -926,7 +926,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
*/
static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- u8 *backbone_addr, u8 *claim_addr,
+ const u8 *backbone_addr, const u8 *claim_addr,
unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -964,7 +964,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
*/
static bool batadv_handle_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- u8 *backbone_addr, u8 *claim_addr,
+ const u8 *backbone_addr, const u8 *claim_addr,
unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
@@ -2126,7 +2130,7 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid,
struct batadv_hard_iface *primary_if,
struct batadv_bla_claim *claim)
{
- u8 *primary_addr = primary_if->net_dev->dev_addr;
+ const u8 *primary_addr = primary_if->net_dev->dev_addr;
u16 backbone_crc;
bool is_own;
void *hdr;
@@ -2294,7 +2298,7 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid,
struct batadv_hard_iface *primary_if,
struct batadv_bla_backbone_gw *backbone_gw)
{
- u8 *primary_addr = primary_if->net_dev->dev_addr;
+ const u8 *primary_addr = primary_if->net_dev->dev_addr;
u16 backbone_crc;
bool is_own;
int msecs;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3ddd66e4c29e..5207cd8d6ad8 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
bat_priv->gw.generation = 0;
- ret = batadv_v_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
+
+ ret = batadv_v_mesh_init(bat_priv);
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_v;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_v_mesh_free(bat_priv);
+err_v:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index a3b6658ed789..433901dcf0c3 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -89,7 +89,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
rcu_read_lock();
do {
upper = netdev_master_upper_dev_get_rcu(upper);
- } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
+ } while (upper && !netif_is_bridge_master(upper));
dev_hold(upper);
rcu_read_unlock();
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 9f06132e007d..0a7f1d36a6a8 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 970d0d7ccc98..83f31494ea4d 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -747,7 +747,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node *orig_node = NULL;
struct batadv_hard_iface *primary_if = NULL;
bool ret = false;
- u8 *orig_addr, orig_ttvn;
+ const u8 *orig_addr;
+ u8 orig_ttvn;
if (batadv_is_my_client(bat_priv, dst_addr, vid)) {
primary_if = batadv_primary_if_get_selected(bat_priv);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0604b0279573..7ee09337fc40 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -134,7 +134,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
return -EADDRNOTAVAIL;
ether_addr_copy(old_addr, dev->dev_addr);
- ether_addr_copy(dev->dev_addr, addr->sa_data);
+ eth_hw_addr_set(dev, addr->sa_data);
/* only modify transtable if it has been initialized before */
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 56b9fe97b3b4..93730d30af54 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -631,9 +631,9 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node = NULL;
const struct batadv_icmp_tp_packet *icmp;
struct batadv_tp_vars *tp_vars;
+ const unsigned char *dev_addr;
size_t packet_len, mss;
u32 rtt, recv_ack, cwnd;
- unsigned char *dev_addr;
packet_len = BATADV_TP_PLEN;
mss = BATADV_TP_PLEN;
@@ -890,7 +890,7 @@ out:
batadv_tp_vars_put(tp_vars);
- do_exit(0);
+ return 0;
}
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index e0b3dace2020..4b7ad6684bc4 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 992773376e51..0cb58eb04093 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -587,8 +587,8 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*/
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src,
+ const u8 *dst, u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len)
{
struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 54f2a35653d0..4cf8af00fc11 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -42,8 +42,8 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
u8 *src, u8 *dst,
void *tvlv_buff, u16 tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src,
+ const u8 *dst, u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len);
#endif /* _NET_BATMAN_ADV_TVLV_H_ */
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index fd164a248569..133d7ea063fb 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -663,6 +663,7 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
{
struct net_device *netdev;
+ bdaddr_t addr;
int err;
netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)),
@@ -672,7 +673,8 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
return -ENOMEM;
netdev->addr_assign_type = NET_ADDR_PERM;
- baswap((void *)netdev->dev_addr, &chan->src);
+ baswap(&addr, &chan->src);
+ __dev_addr_set(netdev, &addr, sizeof(addr));
netdev->netdev_ops = &netdev_ops;
SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index cc0995301f93..291770fc9551 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -14,7 +14,8 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
- ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o
+ ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \
+ eir.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 72f47b372705..c9add7753b9f 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -594,7 +594,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
* ie. eh.h_dest is our local address. */
memcpy(s->eh.h_dest, &src, ETH_ALEN);
memcpy(s->eh.h_source, &dst, ETH_ALEN);
- memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN);
+ eth_hw_addr_set(dev, s->eh.h_dest);
s->dev = dev;
s->sock = sock;
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
new file mode 100644
index 000000000000..7e930f77ecab
--- /dev/null
+++ b/net/bluetooth/eir.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
+
+#include "eir.h"
+
+#define PNP_INFO_SVCLASS_ID 0x1200
+
+u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+{
+ size_t short_len;
+ size_t complete_len;
+
+ /* no space left for name (+ NULL + type + len) */
+ if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
+ return ad_len;
+
+ /* use complete name if present and fits */
+ complete_len = strlen(hdev->dev_name);
+ if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
+ return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
+ hdev->dev_name, complete_len + 1);
+
+ /* use short name if present */
+ short_len = strlen(hdev->short_name);
+ if (short_len)
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->short_name, short_len + 1);
+
+ /* use shortened full name if present, we already know that name
+ * is longer then HCI_MAX_SHORT_NAME_LENGTH
+ */
+ if (complete_len) {
+ u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
+
+ memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH);
+ name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
+
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name,
+ sizeof(name));
+ }
+
+ return ad_len;
+}
+
+u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+{
+ return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
+}
+
+static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 4)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ u16 uuid16;
+
+ if (uuid->size != 16)
+ continue;
+
+ uuid16 = get_unaligned_le16(&uuid->uuid[12]);
+ if (uuid16 < 0x1100)
+ continue;
+
+ if (uuid16 == PNP_INFO_SVCLASS_ID)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID16_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + sizeof(u16) > len) {
+ uuids_start[1] = EIR_UUID16_SOME;
+ break;
+ }
+
+ *ptr++ = (uuid16 & 0x00ff);
+ *ptr++ = (uuid16 & 0xff00) >> 8;
+ uuids_start[0] += sizeof(uuid16);
+ }
+
+ return ptr;
+}
+
+static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 6)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ if (uuid->size != 32)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID32_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + sizeof(u32) > len) {
+ uuids_start[1] = EIR_UUID32_SOME;
+ break;
+ }
+
+ memcpy(ptr, &uuid->uuid[12], sizeof(u32));
+ ptr += sizeof(u32);
+ uuids_start[0] += sizeof(u32);
+ }
+
+ return ptr;
+}
+
+static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 18)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ if (uuid->size != 128)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID128_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + 16 > len) {
+ uuids_start[1] = EIR_UUID128_SOME;
+ break;
+ }
+
+ memcpy(ptr, uuid->uuid, 16);
+ ptr += 16;
+ uuids_start[0] += 16;
+ }
+
+ return ptr;
+}
+
+void eir_create(struct hci_dev *hdev, u8 *data)
+{
+ u8 *ptr = data;
+ size_t name_len;
+
+ name_len = strlen(hdev->dev_name);
+
+ if (name_len > 0) {
+ /* EIR Data type */
+ if (name_len > 48) {
+ name_len = 48;
+ ptr[1] = EIR_NAME_SHORT;
+ } else {
+ ptr[1] = EIR_NAME_COMPLETE;
+ }
+
+ /* EIR Data length */
+ ptr[0] = name_len + 1;
+
+ memcpy(ptr + 2, hdev->dev_name, name_len);
+
+ ptr += (name_len + 2);
+ }
+
+ if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) {
+ ptr[0] = 2;
+ ptr[1] = EIR_TX_POWER;
+ ptr[2] = (u8)hdev->inq_tx_power;
+
+ ptr += 3;
+ }
+
+ if (hdev->devid_source > 0) {
+ ptr[0] = 9;
+ ptr[1] = EIR_DEVICE_ID;
+
+ put_unaligned_le16(hdev->devid_source, ptr + 2);
+ put_unaligned_le16(hdev->devid_vendor, ptr + 4);
+ put_unaligned_le16(hdev->devid_product, ptr + 6);
+ put_unaligned_le16(hdev->devid_version, ptr + 8);
+
+ ptr += 10;
+ }
+
+ ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+ ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+ ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+}
+
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+{
+ struct adv_info *adv = NULL;
+ u8 ad_len = 0, flags = 0;
+ u32 instance_flags;
+
+ /* Return 0 when the current instance identifier is invalid. */
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv)
+ return 0;
+ }
+
+ instance_flags = hci_adv_instance_flags(hdev, instance);
+
+ /* If instance already has the flags set skip adding it once
+ * again.
+ */
+ if (adv && eir_get_data(adv->adv_data, adv->adv_data_len, EIR_FLAGS,
+ NULL))
+ goto skip_flags;
+
+ /* The Add Advertising command allows userspace to set both the general
+ * and limited discoverable flags.
+ */
+ if (instance_flags & MGMT_ADV_FLAG_DISCOV)
+ flags |= LE_AD_GENERAL;
+
+ if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV)
+ flags |= LE_AD_LIMITED;
+
+ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+ flags |= LE_AD_NO_BREDR;
+
+ if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) {
+ /* If a discovery flag wasn't provided, simply use the global
+ * settings.
+ */
+ if (!flags)
+ flags |= mgmt_get_adv_discov_flags(hdev);
+
+ /* If flags would still be empty, then there is no need to
+ * include the "Flags" AD field".
+ */
+ if (flags) {
+ ptr[0] = 0x02;
+ ptr[1] = EIR_FLAGS;
+ ptr[2] = flags;
+
+ ad_len += 3;
+ ptr += 3;
+ }
+ }
+
+skip_flags:
+ if (adv) {
+ memcpy(ptr, adv->adv_data, adv->adv_data_len);
+ ad_len += adv->adv_data_len;
+ ptr += adv->adv_data_len;
+ }
+
+ if (instance_flags & MGMT_ADV_FLAG_TX_POWER) {
+ s8 adv_tx_power;
+
+ if (ext_adv_capable(hdev)) {
+ if (adv)
+ adv_tx_power = adv->tx_power;
+ else
+ adv_tx_power = hdev->adv_tx_power;
+ } else {
+ adv_tx_power = hdev->adv_tx_power;
+ }
+
+ /* Provide Tx Power only if we can provide a valid value for it */
+ if (adv_tx_power != HCI_TX_POWER_INVALID) {
+ ptr[0] = 0x02;
+ ptr[1] = EIR_TX_POWER;
+ ptr[2] = (u8)adv_tx_power;
+
+ ad_len += 3;
+ ptr += 3;
+ }
+ }
+
+ return ad_len;
+}
+
+static u8 create_default_scan_rsp(struct hci_dev *hdev, u8 *ptr)
+{
+ u8 scan_rsp_len = 0;
+
+ if (hdev->appearance)
+ scan_rsp_len = eir_append_appearance(hdev, ptr, scan_rsp_len);
+
+ return eir_append_local_name(hdev, ptr, scan_rsp_len);
+}
+
+u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
+{
+ struct adv_info *adv;
+ u8 scan_rsp_len = 0;
+
+ if (!instance)
+ return create_default_scan_rsp(hdev, ptr);
+
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv)
+ return 0;
+
+ if ((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance)
+ scan_rsp_len = eir_append_appearance(hdev, ptr, scan_rsp_len);
+
+ memcpy(&ptr[scan_rsp_len], adv->scan_rsp_data, adv->scan_rsp_len);
+
+ scan_rsp_len += adv->scan_rsp_len;
+
+ if (adv->flags & MGMT_ADV_FLAG_LOCAL_NAME)
+ scan_rsp_len = eir_append_local_name(hdev, ptr, scan_rsp_len);
+
+ return scan_rsp_len;
+}
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
new file mode 100644
index 000000000000..724662f8f8b1
--- /dev/null
+++ b/net/bluetooth/eir.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+void eir_create(struct hci_dev *hdev, u8 *data);
+
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
+u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
+
+u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len);
+u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+
+static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
+ u8 *data, u8 data_len)
+{
+ eir[eir_len++] = sizeof(type) + data_len;
+ eir[eir_len++] = type;
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
+static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
+{
+ eir[eir_len++] = sizeof(type) + sizeof(data);
+ eir[eir_len++] = type;
+ put_unaligned_le16(data, &eir[eir_len]);
+ eir_len += sizeof(data);
+
+ return eir_len;
+}
+
+static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
+ size_t *data_len)
+{
+ size_t parsed = 0;
+
+ if (eir_len < 2)
+ return NULL;
+
+ while (parsed < eir_len - 1) {
+ u8 field_len = eir[0];
+
+ if (field_len == 0)
+ break;
+
+ parsed += field_len + 1;
+
+ if (parsed > eir_len)
+ break;
+
+ if (eir[1] != type) {
+ eir += field_len + 1;
+ continue;
+ }
+
+ /* Zero length data */
+ if (field_len == 1)
+ return NULL;
+
+ if (data_len)
+ *data_len = field_len - 1;
+
+ return &eir[2];
+ }
+
+ return NULL;
+}
diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c
new file mode 100644
index 000000000000..f0421d0edaa3
--- /dev/null
+++ b/net/bluetooth/hci_codec.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2021 Intel Corporation */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include "hci_codec.h"
+
+static int hci_codec_list_add(struct list_head *list,
+ struct hci_op_read_local_codec_caps *sent,
+ struct hci_rp_read_local_codec_caps *rp,
+ void *caps,
+ __u32 len)
+{
+ struct codec_list *entry;
+
+ entry = kzalloc(sizeof(*entry) + len, GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->id = sent->id;
+ if (sent->id == 0xFF) {
+ entry->cid = __le16_to_cpu(sent->cid);
+ entry->vid = __le16_to_cpu(sent->vid);
+ }
+ entry->transport = sent->transport;
+ entry->len = len;
+ entry->num_caps = rp->num_caps;
+ if (rp->num_caps)
+ memcpy(entry->caps, caps, len);
+ list_add(&entry->list, list);
+
+ return 0;
+}
+
+void hci_codec_list_clear(struct list_head *codec_list)
+{
+ struct codec_list *c, *n;
+
+ list_for_each_entry_safe(c, n, codec_list, list) {
+ list_del(&c->list);
+ kfree(c);
+ }
+}
+
+static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport,
+ struct hci_op_read_local_codec_caps
+ *cmd)
+{
+ __u8 i;
+
+ for (i = 0; i < TRANSPORT_TYPE_MAX; i++) {
+ if (transport & BIT(i)) {
+ struct hci_rp_read_local_codec_caps *rp;
+ struct hci_codec_caps *caps;
+ struct sk_buff *skb;
+ __u8 j;
+ __u32 len;
+
+ cmd->transport = i;
+ skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
+ sizeof(*cmd), cmd,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Failed to read codec capabilities (%ld)",
+ PTR_ERR(skb));
+ continue;
+ }
+
+ if (skb->len < sizeof(*rp))
+ goto error;
+
+ rp = (void *)skb->data;
+
+ if (rp->status)
+ goto error;
+
+ if (!rp->num_caps) {
+ len = 0;
+ /* this codec doesn't have capabilities */
+ goto skip_caps_parse;
+ }
+
+ skb_pull(skb, sizeof(*rp));
+
+ for (j = 0, len = 0; j < rp->num_caps; j++) {
+ caps = (void *)skb->data;
+ if (skb->len < sizeof(*caps))
+ goto error;
+ if (skb->len < caps->len)
+ goto error;
+ len += sizeof(caps->len) + caps->len;
+ skb_pull(skb, sizeof(caps->len) + caps->len);
+ }
+
+skip_caps_parse:
+ hci_dev_lock(hdev);
+ hci_codec_list_add(&hdev->local_codecs, cmd, rp,
+ (__u8 *)rp + sizeof(*rp), len);
+ hci_dev_unlock(hdev);
+error:
+ kfree_skb(skb);
+ }
+ }
+}
+
+void hci_read_supported_codecs(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+ struct hci_rp_read_local_supported_codecs *rp;
+ struct hci_std_codecs *std_codecs;
+ struct hci_vnd_codecs *vnd_codecs;
+ struct hci_op_read_local_codec_caps caps;
+ __u8 i;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
+ HCI_CMD_TIMEOUT);
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
+ PTR_ERR(skb));
+ return;
+ }
+
+ if (skb->len < sizeof(*rp))
+ goto error;
+
+ rp = (void *)skb->data;
+
+ if (rp->status)
+ goto error;
+
+ skb_pull(skb, sizeof(rp->status));
+
+ std_codecs = (void *)skb->data;
+
+ /* validate codecs length before accessing */
+ if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num)
+ + sizeof(std_codecs->num))
+ goto error;
+
+ /* enumerate codec capabilities of standard codecs */
+ memset(&caps, 0, sizeof(caps));
+ for (i = 0; i < std_codecs->num; i++) {
+ caps.id = std_codecs->codec[i];
+ caps.direction = 0x00;
+ hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ }
+
+ skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
+ + sizeof(std_codecs->num));
+
+ vnd_codecs = (void *)skb->data;
+
+ /* validate vendor codecs length before accessing */
+ if (skb->len <
+ flex_array_size(vnd_codecs, codec, vnd_codecs->num)
+ + sizeof(vnd_codecs->num))
+ goto error;
+
+ /* enumerate vendor codec capabilities */
+ for (i = 0; i < vnd_codecs->num; i++) {
+ caps.id = 0xFF;
+ caps.cid = vnd_codecs->codec[i].cid;
+ caps.vid = vnd_codecs->codec[i].vid;
+ caps.direction = 0x00;
+ hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ }
+
+error:
+ kfree_skb(skb);
+}
+
+void hci_read_supported_codecs_v2(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+ struct hci_rp_read_local_supported_codecs_v2 *rp;
+ struct hci_std_codecs_v2 *std_codecs;
+ struct hci_vnd_codecs_v2 *vnd_codecs;
+ struct hci_op_read_local_codec_caps caps;
+ __u8 i;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
+ HCI_CMD_TIMEOUT);
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
+ PTR_ERR(skb));
+ return;
+ }
+
+ if (skb->len < sizeof(*rp))
+ goto error;
+
+ rp = (void *)skb->data;
+
+ if (rp->status)
+ goto error;
+
+ skb_pull(skb, sizeof(rp->status));
+
+ std_codecs = (void *)skb->data;
+
+ /* check for payload data length before accessing */
+ if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num)
+ + sizeof(std_codecs->num))
+ goto error;
+
+ memset(&caps, 0, sizeof(caps));
+
+ for (i = 0; i < std_codecs->num; i++) {
+ caps.id = std_codecs->codec[i].id;
+ hci_read_codec_capabilities(hdev, std_codecs->codec[i].transport,
+ &caps);
+ }
+
+ skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
+ + sizeof(std_codecs->num));
+
+ vnd_codecs = (void *)skb->data;
+
+ /* check for payload data length before accessing */
+ if (skb->len <
+ flex_array_size(vnd_codecs, codec, vnd_codecs->num)
+ + sizeof(vnd_codecs->num))
+ goto error;
+
+ for (i = 0; i < vnd_codecs->num; i++) {
+ caps.id = 0xFF;
+ caps.cid = vnd_codecs->codec[i].cid;
+ caps.vid = vnd_codecs->codec[i].vid;
+ hci_read_codec_capabilities(hdev, vnd_codecs->codec[i].transport,
+ &caps);
+ }
+
+error:
+ kfree_skb(skb);
+}
diff --git a/net/bluetooth/hci_codec.h b/net/bluetooth/hci_codec.h
new file mode 100644
index 000000000000..a2751930f123
--- /dev/null
+++ b/net/bluetooth/hci_codec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Copyright (C) 2014 Intel Corporation */
+
+void hci_read_supported_codecs(struct hci_dev *hdev);
+void hci_read_supported_codecs_v2(struct hci_dev *hdev);
+void hci_codec_list_clear(struct list_head *codec_list);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2b5059a56cda..bd669c95b9a7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -307,13 +307,133 @@ static bool find_next_esco_param(struct hci_conn *conn,
return conn->attempt <= size;
}
-bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
+static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct hci_cp_enhanced_setup_sync_conn cp;
+ const struct sco_param *param;
+
+ bt_dev_dbg(hdev, "hcon %p", conn);
+
+ /* for offload use case, codec needs to configured before opening SCO */
+ if (conn->codec.data_path)
+ hci_req_configure_datapath(hdev, &conn->codec);
+
+ conn->state = BT_CONNECT;
+ conn->out = true;
+
+ conn->attempt++;
+
+ memset(&cp, 0x00, sizeof(cp));
+
+ cp.handle = cpu_to_le16(handle);
+
+ cp.tx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = cpu_to_le32(0x00001f40);
+
+ switch (conn->codec.id) {
+ case BT_CODEC_MSBC:
+ if (!find_next_esco_param(conn, esco_param_msbc,
+ ARRAY_SIZE(esco_param_msbc)))
+ return false;
+
+ param = &esco_param_msbc[conn->attempt - 1];
+ cp.tx_coding_format.id = 0x05;
+ cp.rx_coding_format.id = 0x05;
+ cp.tx_codec_frame_size = __cpu_to_le16(60);
+ cp.rx_codec_frame_size = __cpu_to_le16(60);
+ cp.in_bandwidth = __cpu_to_le32(32000);
+ cp.out_bandwidth = __cpu_to_le32(32000);
+ cp.in_coding_format.id = 0x04;
+ cp.out_coding_format.id = 0x04;
+ cp.in_coded_data_size = __cpu_to_le16(16);
+ cp.out_coded_data_size = __cpu_to_le16(16);
+ cp.in_pcm_data_format = 2;
+ cp.out_pcm_data_format = 2;
+ cp.in_pcm_sample_payload_msb_pos = 0;
+ cp.out_pcm_sample_payload_msb_pos = 0;
+ cp.in_data_path = conn->codec.data_path;
+ cp.out_data_path = conn->codec.data_path;
+ cp.in_transport_unit_size = 1;
+ cp.out_transport_unit_size = 1;
+ break;
+
+ case BT_CODEC_TRANSPARENT:
+ if (!find_next_esco_param(conn, esco_param_msbc,
+ ARRAY_SIZE(esco_param_msbc)))
+ return false;
+ param = &esco_param_msbc[conn->attempt - 1];
+ cp.tx_coding_format.id = 0x03;
+ cp.rx_coding_format.id = 0x03;
+ cp.tx_codec_frame_size = __cpu_to_le16(60);
+ cp.rx_codec_frame_size = __cpu_to_le16(60);
+ cp.in_bandwidth = __cpu_to_le32(0x1f40);
+ cp.out_bandwidth = __cpu_to_le32(0x1f40);
+ cp.in_coding_format.id = 0x03;
+ cp.out_coding_format.id = 0x03;
+ cp.in_coded_data_size = __cpu_to_le16(16);
+ cp.out_coded_data_size = __cpu_to_le16(16);
+ cp.in_pcm_data_format = 2;
+ cp.out_pcm_data_format = 2;
+ cp.in_pcm_sample_payload_msb_pos = 0;
+ cp.out_pcm_sample_payload_msb_pos = 0;
+ cp.in_data_path = conn->codec.data_path;
+ cp.out_data_path = conn->codec.data_path;
+ cp.in_transport_unit_size = 1;
+ cp.out_transport_unit_size = 1;
+ break;
+
+ case BT_CODEC_CVSD:
+ if (lmp_esco_capable(conn->link)) {
+ if (!find_next_esco_param(conn, esco_param_cvsd,
+ ARRAY_SIZE(esco_param_cvsd)))
+ return false;
+ param = &esco_param_cvsd[conn->attempt - 1];
+ } else {
+ if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
+ return false;
+ param = &sco_param_cvsd[conn->attempt - 1];
+ }
+ cp.tx_coding_format.id = 2;
+ cp.rx_coding_format.id = 2;
+ cp.tx_codec_frame_size = __cpu_to_le16(60);
+ cp.rx_codec_frame_size = __cpu_to_le16(60);
+ cp.in_bandwidth = __cpu_to_le32(16000);
+ cp.out_bandwidth = __cpu_to_le32(16000);
+ cp.in_coding_format.id = 4;
+ cp.out_coding_format.id = 4;
+ cp.in_coded_data_size = __cpu_to_le16(16);
+ cp.out_coded_data_size = __cpu_to_le16(16);
+ cp.in_pcm_data_format = 2;
+ cp.out_pcm_data_format = 2;
+ cp.in_pcm_sample_payload_msb_pos = 0;
+ cp.out_pcm_sample_payload_msb_pos = 0;
+ cp.in_data_path = conn->codec.data_path;
+ cp.out_data_path = conn->codec.data_path;
+ cp.in_transport_unit_size = 16;
+ cp.out_transport_unit_size = 16;
+ break;
+ default:
+ return false;
+ }
+
+ cp.retrans_effort = param->retrans_effort;
+ cp.pkt_type = __cpu_to_le16(param->pkt_type);
+ cp.max_latency = __cpu_to_le16(param->max_latency);
+
+ if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
+ return false;
+
+ return true;
+}
+
+static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_setup_sync_conn cp;
const struct sco_param *param;
- BT_DBG("hcon %p", conn);
+ bt_dev_dbg(hdev, "hcon %p", conn);
conn->state = BT_CONNECT;
conn->out = true;
@@ -359,6 +479,14 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
return true;
}
+bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
+{
+ if (enhanced_sco_capable(conn->hdev))
+ return hci_enhanced_setup_sync_conn(conn, handle);
+
+ return hci_setup_sync_conn(conn, handle);
+}
+
u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
u16 to_multiplier)
{
@@ -1040,8 +1168,8 @@ static void hci_req_directed_advertising(struct hci_request *req,
}
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
- u8 dst_type, u8 sec_level, u16 conn_timeout,
- u8 role, bdaddr_t *direct_rpa)
+ u8 dst_type, bool dst_resolved, u8 sec_level,
+ u16 conn_timeout, u8 role, bdaddr_t *direct_rpa)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -1078,19 +1206,24 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EBUSY);
}
- /* When given an identity address with existing identity
- * resolving key, the connection needs to be established
- * to a resolvable random address.
- *
- * Storing the resolvable random address is required here
- * to handle connection failures. The address will later
- * be resolved back into the original identity address
- * from the connect request.
+ /* Check if the destination address has been resolved by the controller
+ * since if it did then the identity address shall be used.
*/
- irk = hci_find_irk_by_addr(hdev, dst, dst_type);
- if (irk && bacmp(&irk->rpa, BDADDR_ANY)) {
- dst = &irk->rpa;
- dst_type = ADDR_LE_DEV_RANDOM;
+ if (!dst_resolved) {
+ /* When given an identity address with existing identity
+ * resolving key, the connection needs to be established
+ * to a resolvable random address.
+ *
+ * Storing the resolvable random address is required here
+ * to handle connection failures. The address will later
+ * be resolved back into the original identity address
+ * from the connect request.
+ */
+ irk = hci_find_irk_by_addr(hdev, dst, dst_type);
+ if (irk && bacmp(&irk->rpa, BDADDR_ANY)) {
+ dst = &irk->rpa;
+ dst_type = ADDR_LE_DEV_RANDOM;
+ }
}
if (conn) {
@@ -1319,7 +1452,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
}
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u16 setting)
+ __u16 setting, struct bt_codec *codec)
{
struct hci_conn *acl;
struct hci_conn *sco;
@@ -1344,6 +1477,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
hci_conn_hold(sco);
sco->setting = setting;
+ sco->codec = *codec;
if (acl->state == BT_CONNECTED &&
(sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 8a47a3017d61..8d33aa64846b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -45,6 +45,7 @@
#include "leds.h"
#include "msft.h"
#include "aosp.h"
+#include "hci_codec.h"
static void hci_rx_work(struct work_struct *work);
static void hci_cmd_work(struct work_struct *work);
@@ -61,130 +62,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-/* ---- HCI debugfs entries ---- */
-
-static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[3];
-
- buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N';
- buf[1] = '\n';
- buf[2] = '\0';
- return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- struct sk_buff *skb;
- bool enable;
- int err;
-
- if (!test_bit(HCI_UP, &hdev->flags))
- return -ENETDOWN;
-
- err = kstrtobool_from_user(user_buf, count, &enable);
- if (err)
- return err;
-
- if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE))
- return -EALREADY;
-
- hci_req_sync_lock(hdev);
- if (enable)
- skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL,
- HCI_CMD_TIMEOUT);
- else
- skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
- HCI_CMD_TIMEOUT);
- hci_req_sync_unlock(hdev);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- kfree_skb(skb);
-
- hci_dev_change_flag(hdev, HCI_DUT_MODE);
-
- return count;
-}
-
-static const struct file_operations dut_mode_fops = {
- .open = simple_open,
- .read = dut_mode_read,
- .write = dut_mode_write,
- .llseek = default_llseek,
-};
-
-static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[3];
-
- buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N';
- buf[1] = '\n';
- buf[2] = '\0';
- return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- bool enable;
- int err;
-
- err = kstrtobool_from_user(user_buf, count, &enable);
- if (err)
- return err;
-
- /* When the diagnostic flags are not persistent and the transport
- * is not active or in user channel operation, then there is no need
- * for the vendor callback. Instead just store the desired value and
- * the setting will be programmed when the controller gets powered on.
- */
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
- (!test_bit(HCI_RUNNING, &hdev->flags) ||
- hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
- goto done;
-
- hci_req_sync_lock(hdev);
- err = hdev->set_diag(hdev, enable);
- hci_req_sync_unlock(hdev);
-
- if (err < 0)
- return err;
-
-done:
- if (enable)
- hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
- else
- hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG);
-
- return count;
-}
-
-static const struct file_operations vendor_diag_fops = {
- .open = simple_open,
- .read = vendor_diag_read,
- .write = vendor_diag_write,
- .llseek = default_llseek,
-};
-
-static void hci_debugfs_create_basic(struct hci_dev *hdev)
-{
- debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
- &dut_mode_fops);
-
- if (hdev->set_diag)
- debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev,
- &vendor_diag_fops);
-}
-
static int hci_reset_req(struct hci_request *req, unsigned long opt)
{
BT_DBG("%s %ld", req->hdev->name, opt);
@@ -838,10 +715,6 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[22] & 0x04)
hci_set_event_mask_page_2(req);
- /* Read local codec list if the HCI command is supported */
- if (hdev->commands[29] & 0x20)
- hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL);
-
/* Read local pairing options if the HCI command is supported */
if (hdev->commands[41] & 0x08)
hci_req_add(req, HCI_OP_READ_LOCAL_PAIRING_OPTS, 0, NULL);
@@ -937,6 +810,12 @@ static int __hci_init(struct hci_dev *hdev)
if (err < 0)
return err;
+ /* Read local codec list if the HCI command is supported */
+ if (hdev->commands[45] & 0x04)
+ hci_read_supported_codecs_v2(hdev);
+ else if (hdev->commands[29] & 0x20)
+ hci_read_supported_codecs(hdev);
+
/* This function is only called when the controller is actually in
* configured state. When the controller is marked as unconfigured,
* this initialization procedure is not run.
@@ -1848,6 +1727,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_req_sync_unlock(hdev);
@@ -3081,6 +2961,60 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
}
/* This function requires the caller holds hdev->lock */
+u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance)
+{
+ u32 flags;
+ struct adv_info *adv;
+
+ if (instance == 0x00) {
+ /* Instance 0 always manages the "Tx Power" and "Flags"
+ * fields
+ */
+ flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
+
+ /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
+ * corresponds to the "connectable" instance flag.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
+ flags |= MGMT_ADV_FLAG_CONNECTABLE;
+
+ if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
+ flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
+ else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
+ flags |= MGMT_ADV_FLAG_DISCOV;
+
+ return flags;
+ }
+
+ adv = hci_find_adv_instance(hdev, instance);
+
+ /* Return 0 when we got an invalid instance identifier. */
+ if (!adv)
+ return 0;
+
+ return adv->flags;
+}
+
+bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance)
+{
+ struct adv_info *adv;
+
+ /* Instance 0x00 always set local name */
+ if (instance == 0x00)
+ return true;
+
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv)
+ return false;
+
+ if (adv->flags & MGMT_ADV_FLAG_APPEARANCE ||
+ adv->flags & MGMT_ADV_FLAG_LOCAL_NAME)
+ return true;
+
+ return adv->scan_rsp_len ? true : false;
+}
+
+/* This function requires the caller holds hdev->lock */
void hci_adv_monitors_clear(struct hci_dev *hdev)
{
struct adv_monitor *monitor;
@@ -3487,15 +3421,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
{
struct hci_conn_params *param;
- switch (addr_type) {
- case ADDR_LE_DEV_PUBLIC_RESOLVED:
- addr_type = ADDR_LE_DEV_PUBLIC;
- break;
- case ADDR_LE_DEV_RANDOM_RESOLVED:
- addr_type = ADDR_LE_DEV_RANDOM;
- break;
- }
-
list_for_each_entry(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
param->addr_type == addr_type)
@@ -3701,55 +3626,12 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
struct hci_dev *hdev =
container_of(nb, struct hci_dev, suspend_notifier);
int ret = 0;
- u8 state = BT_RUNNING;
- /* If powering down, wait for completion. */
- if (mgmt_powering_down(hdev)) {
- set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks);
- ret = hci_suspend_wait_event(hdev);
- if (ret)
- goto done;
- }
-
- /* Suspend notifier should only act on events when powered. */
- if (!hdev_is_powered(hdev) ||
- hci_dev_test_flag(hdev, HCI_UNREGISTER))
- goto done;
+ if (action == PM_SUSPEND_PREPARE)
+ ret = hci_suspend_dev(hdev);
+ else if (action == PM_POST_SUSPEND)
+ ret = hci_resume_dev(hdev);
- if (action == PM_SUSPEND_PREPARE) {
- /* Suspend consists of two actions:
- * - First, disconnect everything and make the controller not
- * connectable (disabling scanning)
- * - Second, program event filter/accept list and enable scan
- */
- ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
- if (!ret)
- state = BT_SUSPEND_DISCONNECT;
-
- /* Only configure accept list if disconnect succeeded and wake
- * isn't being prevented.
- */
- if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) {
- ret = hci_change_suspend_state(hdev,
- BT_SUSPEND_CONFIGURE_WAKE);
- if (!ret)
- state = BT_SUSPEND_CONFIGURE_WAKE;
- }
-
- hci_clear_wake_reason(hdev);
- mgmt_suspending(hdev, state);
-
- } else if (action == PM_POST_SUSPEND) {
- ret = hci_change_suspend_state(hdev, BT_RUNNING);
-
- mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr,
- hdev->wake_addr_type);
- }
-
-done:
- /* We always allow suspend even if suspend preparation failed and
- * attempt to recover in resume.
- */
if (ret)
bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
action, ret);
@@ -3857,6 +3739,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
INIT_LIST_HEAD(&hdev->adv_instances);
INIT_LIST_HEAD(&hdev->blocked_keys);
+ INIT_LIST_HEAD(&hdev->local_codecs);
INIT_WORK(&hdev->rx_work, hci_rx_work);
INIT_WORK(&hdev->cmd_work, hci_cmd_work);
INIT_WORK(&hdev->tx_work, hci_tx_work);
@@ -3994,6 +3877,7 @@ int hci_register_dev(struct hci_dev *hdev)
queue_work(hdev->req_workqueue, &hdev->power_on);
idr_init(&hdev->adv_monitors_idr);
+ msft_register(hdev);
return id;
@@ -4026,6 +3910,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
cancel_work_sync(&hdev->suspend_prepare);
}
+ msft_unregister(hdev);
+
hci_dev_do_close(hdev);
if (!test_bit(HCI_INIT, &hdev->flags) &&
@@ -4088,16 +3974,78 @@ EXPORT_SYMBOL(hci_release_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
{
+ int ret;
+ u8 state = BT_RUNNING;
+
+ bt_dev_dbg(hdev, "");
+
+ /* Suspend should only act on when powered. */
+ if (!hdev_is_powered(hdev) ||
+ hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return 0;
+
+ /* If powering down, wait for completion. */
+ if (mgmt_powering_down(hdev)) {
+ set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks);
+ ret = hci_suspend_wait_event(hdev);
+ if (ret)
+ goto done;
+ }
+
+ /* Suspend consists of two actions:
+ * - First, disconnect everything and make the controller not
+ * connectable (disabling scanning)
+ * - Second, program event filter/accept list and enable scan
+ */
+ ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
+ if (ret)
+ goto clear;
+
+ state = BT_SUSPEND_DISCONNECT;
+
+ /* Only configure accept list if device may wakeup. */
+ if (hdev->wakeup && hdev->wakeup(hdev)) {
+ ret = hci_change_suspend_state(hdev, BT_SUSPEND_CONFIGURE_WAKE);
+ if (!ret)
+ state = BT_SUSPEND_CONFIGURE_WAKE;
+ }
+
+clear:
+ hci_clear_wake_reason(hdev);
+ mgmt_suspending(hdev, state);
+
+done:
+ /* We always allow suspend even if suspend preparation failed and
+ * attempt to recover in resume.
+ */
hci_sock_dev_event(hdev, HCI_DEV_SUSPEND);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(hci_suspend_dev);
/* Resume HCI device */
int hci_resume_dev(struct hci_dev *hdev)
{
+ int ret;
+
+ bt_dev_dbg(hdev, "");
+
+ /* Resume should only act on when powered. */
+ if (!hdev_is_powered(hdev) ||
+ hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return 0;
+
+ /* If powering down don't attempt to resume */
+ if (mgmt_powering_down(hdev))
+ return 0;
+
+ ret = hci_change_suspend_state(hdev, BT_RUNNING);
+
+ mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr,
+ hdev->wake_addr_type);
+
hci_sock_dev_event(hdev, HCI_DEV_RESUME);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(hci_resume_dev);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 841393389f7b..902b40a90b91 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -27,6 +27,7 @@
#include <net/bluetooth/hci_core.h>
#include "smp.h"
+#include "hci_request.h"
#include "hci_debugfs.h"
#define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \
@@ -1250,3 +1251,125 @@ void hci_debugfs_create_conn(struct hci_conn *conn)
snprintf(name, sizeof(name), "%u", conn->handle);
conn->debugfs = debugfs_create_dir(name, hdev->debugfs);
}
+
+static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
+
+ buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ struct sk_buff *skb;
+ bool enable;
+ int err;
+
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return -ENETDOWN;
+
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
+
+ if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE))
+ return -EALREADY;
+
+ hci_req_sync_lock(hdev);
+ if (enable)
+ skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL,
+ HCI_CMD_TIMEOUT);
+ else
+ skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
+ HCI_CMD_TIMEOUT);
+ hci_req_sync_unlock(hdev);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ kfree_skb(skb);
+
+ hci_dev_change_flag(hdev, HCI_DUT_MODE);
+
+ return count;
+}
+
+static const struct file_operations dut_mode_fops = {
+ .open = simple_open,
+ .read = dut_mode_read,
+ .write = dut_mode_write,
+ .llseek = default_llseek,
+};
+
+static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
+
+ buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ bool enable;
+ int err;
+
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
+
+ /* When the diagnostic flags are not persistent and the transport
+ * is not active or in user channel operation, then there is no need
+ * for the vendor callback. Instead just store the desired value and
+ * the setting will be programmed when the controller gets powered on.
+ */
+ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ (!test_bit(HCI_RUNNING, &hdev->flags) ||
+ hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
+ goto done;
+
+ hci_req_sync_lock(hdev);
+ err = hdev->set_diag(hdev, enable);
+ hci_req_sync_unlock(hdev);
+
+ if (err < 0)
+ return err;
+
+done:
+ if (enable)
+ hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
+ else
+ hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG);
+
+ return count;
+}
+
+static const struct file_operations vendor_diag_fops = {
+ .open = simple_open,
+ .read = vendor_diag_read,
+ .write = vendor_diag_write,
+ .llseek = default_llseek,
+};
+
+void hci_debugfs_create_basic(struct hci_dev *hdev)
+{
+ debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
+ &dut_mode_fops);
+
+ if (hdev->set_diag)
+ debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev,
+ &vendor_diag_fops);
+}
diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h
index 4444dc8cedc2..9a8a7c93bb12 100644
--- a/net/bluetooth/hci_debugfs.h
+++ b/net/bluetooth/hci_debugfs.h
@@ -26,6 +26,7 @@ void hci_debugfs_create_common(struct hci_dev *hdev);
void hci_debugfs_create_bredr(struct hci_dev *hdev);
void hci_debugfs_create_le(struct hci_dev *hdev);
void hci_debugfs_create_conn(struct hci_conn *conn);
+void hci_debugfs_create_basic(struct hci_dev *hdev);
#else
@@ -45,4 +46,8 @@ static inline void hci_debugfs_create_conn(struct hci_conn *conn)
{
}
+static inline void hci_debugfs_create_basic(struct hci_dev *hdev)
+{
+}
+
#endif
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0bca035bf2dc..7d0db1ca1248 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -36,6 +36,7 @@
#include "amp.h"
#include "smp.h"
#include "msft.h"
+#include "eir.h"
#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -2278,6 +2279,41 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
hci_dev_unlock(hdev);
}
+static void hci_cs_enhanced_setup_sync_conn(struct hci_dev *hdev, __u8 status)
+{
+ struct hci_cp_enhanced_setup_sync_conn *cp;
+ struct hci_conn *acl, *sco;
+ __u16 handle;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ if (!status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN);
+ if (!cp)
+ return;
+
+ handle = __le16_to_cpu(cp->handle);
+
+ bt_dev_dbg(hdev, "handle 0x%4.4x", handle);
+
+ hci_dev_lock(hdev);
+
+ acl = hci_conn_hash_lookup_handle(hdev, handle);
+ if (acl) {
+ sco = acl->link;
+ if (sco) {
+ sco->state = BT_CLOSED;
+
+ hci_connect_cfm(sco, status);
+ hci_conn_del(sco);
+ }
+ }
+
+ hci_dev_unlock(hdev);
+}
+
static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status)
{
struct hci_cp_sniff_mode *cp;
@@ -2351,7 +2387,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- if (conn->type == LE_LINK) {
+ if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) {
hdev->cur_adv_instance = conn->adv_instance;
hci_req_reenable_advertising(hdev);
}
@@ -2367,6 +2403,28 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
hci_dev_unlock(hdev);
}
+static u8 ev_bdaddr_type(struct hci_dev *hdev, u8 type, bool *resolved)
+{
+ /* When using controller based address resolution, then the new
+ * address types 0x02 and 0x03 are used. These types need to be
+ * converted back into either public address or random address type
+ */
+ switch (type) {
+ case ADDR_LE_DEV_PUBLIC_RESOLVED:
+ if (resolved)
+ *resolved = true;
+ return ADDR_LE_DEV_PUBLIC;
+ case ADDR_LE_DEV_RANDOM_RESOLVED:
+ if (resolved)
+ *resolved = true;
+ return ADDR_LE_DEV_RANDOM;
+ }
+
+ if (resolved)
+ *resolved = false;
+ return type;
+}
+
static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
u8 peer_addr_type, u8 own_address_type,
u8 filter_policy)
@@ -2378,21 +2436,7 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
if (!conn)
return;
- /* When using controller based address resolution, then the new
- * address types 0x02 and 0x03 are used. These types need to be
- * converted back into either public address or random address type
- */
- if (use_ll_privacy(hdev) &&
- hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
- switch (own_address_type) {
- case ADDR_LE_DEV_PUBLIC_RESOLVED:
- own_address_type = ADDR_LE_DEV_PUBLIC;
- break;
- case ADDR_LE_DEV_RANDOM_RESOLVED:
- own_address_type = ADDR_LE_DEV_RANDOM;
- break;
- }
- }
+ own_address_type = ev_bdaddr_type(hdev, own_address_type, NULL);
/* Store the initiator and responder address information which
* is needed for SMP. These values will not change during the
@@ -2961,7 +3005,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
* or until a connection is created or until the Advertising
* is timed out due to Directed Advertising."
*/
- if (conn->type == LE_LINK) {
+ if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) {
hdev->cur_adv_instance = conn->adv_instance;
hci_req_reenable_advertising(hdev);
}
@@ -3756,6 +3800,10 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cs_setup_sync_conn(hdev, ev->status);
break;
+ case HCI_OP_ENHANCED_SETUP_SYNC_CONN:
+ hci_cs_enhanced_setup_sync_conn(hdev, ev->status);
+ break;
+
case HCI_OP_SNIFF_MODE:
hci_cs_sniff_mode(hdev, ev->status);
break;
@@ -4397,6 +4445,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
{
struct hci_ev_sync_conn_complete *ev = (void *) skb->data;
struct hci_conn *conn;
+ unsigned int notify_evt;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -4471,15 +4520,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
switch (ev->air_mode) {
case 0x02:
- if (hdev->notify)
- hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD);
+ notify_evt = HCI_NOTIFY_ENABLE_SCO_CVSD;
break;
case 0x03:
- if (hdev->notify)
- hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP);
+ notify_evt = HCI_NOTIFY_ENABLE_SCO_TRANSP;
break;
}
+ /* Notify only in case of SCO over HCI transport data path which
+ * is zero and non-zero value shall be non-HCI transport data path
+ */
+ if (conn->codec.data_path == 0) {
+ if (hdev->notify)
+ hdev->notify(hdev, notify_evt);
+ }
+
hci_connect_cfm(conn, ev->status);
if (ev->status)
hci_conn_del(conn);
@@ -5282,22 +5337,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = irk->addr_type;
}
- /* When using controller based address resolution, then the new
- * address types 0x02 and 0x03 are used. These types need to be
- * converted back into either public address or random address type
- */
- if (use_ll_privacy(hdev) &&
- hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
- hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
- switch (conn->dst_type) {
- case ADDR_LE_DEV_PUBLIC_RESOLVED:
- conn->dst_type = ADDR_LE_DEV_PUBLIC;
- break;
- case ADDR_LE_DEV_RANDOM_RESOLVED:
- conn->dst_type = ADDR_LE_DEV_RANDOM;
- break;
- }
- }
+ conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
if (status) {
hci_le_conn_failed(conn, status);
@@ -5479,8 +5519,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
/* This function requires the caller holds hdev->lock */
static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
bdaddr_t *addr,
- u8 addr_type, u8 adv_type,
- bdaddr_t *direct_rpa)
+ u8 addr_type, bool addr_resolved,
+ u8 adv_type, bdaddr_t *direct_rpa)
{
struct hci_conn *conn;
struct hci_conn_params *params;
@@ -5532,9 +5572,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
}
}
- conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
- hdev->def_le_autoconnect_timeout, HCI_ROLE_MASTER,
- direct_rpa);
+ conn = hci_connect_le(hdev, addr, addr_type, addr_resolved,
+ BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout,
+ HCI_ROLE_MASTER, direct_rpa);
if (!IS_ERR(conn)) {
/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
* by higher layer that tried to connect, if no then
@@ -5575,7 +5615,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
struct hci_conn *conn;
- bool match;
+ bool match, bdaddr_resolved;
u32 flags;
u8 *ptr;
@@ -5619,6 +5659,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* controller address.
*/
if (direct_addr) {
+ direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type,
+ &bdaddr_resolved);
+
/* Only resolvable random addresses are valid for these
* kind of reports and others can be ignored.
*/
@@ -5646,13 +5689,15 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
bdaddr_type = irk->addr_type;
}
+ bdaddr_type = ev_bdaddr_type(hdev, bdaddr_type, &bdaddr_resolved);
+
/* Check if we have been requested to connect to this device.
*
* direct_addr is set only for directed advertising reports (it is NULL
* for advertising reports) and is already verified to be RPA above.
*/
- conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
- direct_addr);
+ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
+ type, direct_addr);
if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
/* Store report for later inclusion by
* mgmt_device_connected
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f15626607b2d..92611bfc0b9e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -30,6 +30,7 @@
#include "smp.h"
#include "hci_request.h"
#include "msft.h"
+#include "eir.h"
#define HCI_REQ_DONE 0
#define HCI_REQ_PEND 1
@@ -521,164 +522,6 @@ void __hci_req_update_name(struct hci_request *req)
hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
}
-#define PNP_INFO_SVCLASS_ID 0x1200
-
-static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 4)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- u16 uuid16;
-
- if (uuid->size != 16)
- continue;
-
- uuid16 = get_unaligned_le16(&uuid->uuid[12]);
- if (uuid16 < 0x1100)
- continue;
-
- if (uuid16 == PNP_INFO_SVCLASS_ID)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID16_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + sizeof(u16) > len) {
- uuids_start[1] = EIR_UUID16_SOME;
- break;
- }
-
- *ptr++ = (uuid16 & 0x00ff);
- *ptr++ = (uuid16 & 0xff00) >> 8;
- uuids_start[0] += sizeof(uuid16);
- }
-
- return ptr;
-}
-
-static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 6)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- if (uuid->size != 32)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID32_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + sizeof(u32) > len) {
- uuids_start[1] = EIR_UUID32_SOME;
- break;
- }
-
- memcpy(ptr, &uuid->uuid[12], sizeof(u32));
- ptr += sizeof(u32);
- uuids_start[0] += sizeof(u32);
- }
-
- return ptr;
-}
-
-static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 18)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- if (uuid->size != 128)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID128_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + 16 > len) {
- uuids_start[1] = EIR_UUID128_SOME;
- break;
- }
-
- memcpy(ptr, uuid->uuid, 16);
- ptr += 16;
- uuids_start[0] += 16;
- }
-
- return ptr;
-}
-
-static void create_eir(struct hci_dev *hdev, u8 *data)
-{
- u8 *ptr = data;
- size_t name_len;
-
- name_len = strlen(hdev->dev_name);
-
- if (name_len > 0) {
- /* EIR Data type */
- if (name_len > 48) {
- name_len = 48;
- ptr[1] = EIR_NAME_SHORT;
- } else
- ptr[1] = EIR_NAME_COMPLETE;
-
- /* EIR Data length */
- ptr[0] = name_len + 1;
-
- memcpy(ptr + 2, hdev->dev_name, name_len);
-
- ptr += (name_len + 2);
- }
-
- if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) {
- ptr[0] = 2;
- ptr[1] = EIR_TX_POWER;
- ptr[2] = (u8) hdev->inq_tx_power;
-
- ptr += 3;
- }
-
- if (hdev->devid_source > 0) {
- ptr[0] = 9;
- ptr[1] = EIR_DEVICE_ID;
-
- put_unaligned_le16(hdev->devid_source, ptr + 2);
- put_unaligned_le16(hdev->devid_vendor, ptr + 4);
- put_unaligned_le16(hdev->devid_product, ptr + 6);
- put_unaligned_le16(hdev->devid_version, ptr + 8);
-
- ptr += 10;
- }
-
- ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
- ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
- ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
-}
-
void __hci_req_update_eir(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
@@ -698,7 +541,7 @@ void __hci_req_update_eir(struct hci_request *req)
memset(&cp, 0, sizeof(cp));
- create_eir(hdev, cp.data);
+ eir_create(hdev, cp.data);
if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
return;
@@ -1134,25 +977,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
addr_resolv);
}
-static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance)
-{
- struct adv_info *adv_instance;
-
- /* Instance 0x00 always set local name */
- if (instance == 0x00)
- return true;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return false;
-
- if (adv_instance->flags & MGMT_ADV_FLAG_APPEARANCE ||
- adv_instance->flags & MGMT_ADV_FLAG_LOCAL_NAME)
- return true;
-
- return adv_instance->scan_rsp_len ? true : false;
-}
-
static void hci_req_clear_event_filter(struct hci_request *req)
{
struct hci_cp_set_event_filter f;
@@ -1281,21 +1105,24 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode)
}
}
-static void hci_req_add_set_adv_filter_enable(struct hci_request *req,
- bool enable)
+static void hci_req_prepare_adv_monitor_suspend(struct hci_request *req,
+ bool suspending)
{
struct hci_dev *hdev = req->hdev;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_MSFT:
- msft_req_add_set_filter_enable(req, enable);
+ if (suspending)
+ msft_suspend(hdev);
+ else
+ msft_resume(hdev);
break;
default:
return;
}
/* No need to block when enabling since it's on resume path */
- if (hdev->suspended && !enable)
+ if (hdev->suspended && suspending)
set_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks);
}
@@ -1362,7 +1189,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
}
/* Disable advertisement filters */
- hci_req_add_set_adv_filter_enable(&req, false);
+ hci_req_prepare_adv_monitor_suspend(&req, true);
/* Prevent disconnects from causing scanning to be re-enabled */
hdev->scanning_paused = true;
@@ -1404,7 +1231,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
/* Reset passive/background scanning to normal */
__hci_update_background_scan(&req);
/* Enable all of the advertisement filters */
- hci_req_add_set_adv_filter_enable(&req, true);
+ hci_req_prepare_adv_monitor_suspend(&req, false);
/* Unpause directed advertising */
hdev->advertising_paused = false;
@@ -1442,7 +1269,7 @@ done:
static bool adv_cur_instance_is_scannable(struct hci_dev *hdev)
{
- return adv_instance_is_scannable(hdev, hdev->cur_adv_instance);
+ return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance);
}
void __hci_req_disable_advertising(struct hci_request *req)
@@ -1457,40 +1284,6 @@ void __hci_req_disable_advertising(struct hci_request *req)
}
}
-static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
-{
- u32 flags;
- struct adv_info *adv_instance;
-
- if (instance == 0x00) {
- /* Instance 0 always manages the "Tx Power" and "Flags"
- * fields
- */
- flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
-
- /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
- * corresponds to the "connectable" instance flag.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
- flags |= MGMT_ADV_FLAG_CONNECTABLE;
-
- if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
- flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
- else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
- flags |= MGMT_ADV_FLAG_DISCOV;
-
- return flags;
- }
-
- adv_instance = hci_find_adv_instance(hdev, instance);
-
- /* Return 0 when we got an invalid instance identifier. */
- if (!adv_instance)
- return 0;
-
- return adv_instance->flags;
-}
-
static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
{
/* If privacy is not enabled don't use RPA */
@@ -1555,15 +1348,15 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
void __hci_req_enable_advertising(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
- struct adv_info *adv_instance;
+ struct adv_info *adv;
struct hci_cp_le_set_adv_param cp;
u8 own_addr_type, enable = 0x01;
bool connectable;
u16 adv_min_interval, adv_max_interval;
u32 flags;
- flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
- adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
+ flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance);
+ adv = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
/* If the "connectable" instance flag was not set, then choose between
* ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
@@ -1595,9 +1388,9 @@ void __hci_req_enable_advertising(struct hci_request *req)
memset(&cp, 0, sizeof(cp));
- if (adv_instance) {
- adv_min_interval = adv_instance->min_interval;
- adv_max_interval = adv_instance->max_interval;
+ if (adv) {
+ adv_min_interval = adv->min_interval;
+ adv_max_interval = adv->max_interval;
} else {
adv_min_interval = hdev->le_adv_min_interval;
adv_max_interval = hdev->le_adv_max_interval;
@@ -1628,85 +1421,6 @@ void __hci_req_enable_advertising(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
}
-u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
-{
- size_t short_len;
- size_t complete_len;
-
- /* no space left for name (+ NULL + type + len) */
- if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
- return ad_len;
-
- /* use complete name if present and fits */
- complete_len = strlen(hdev->dev_name);
- if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
- return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
- hdev->dev_name, complete_len + 1);
-
- /* use short name if present */
- short_len = strlen(hdev->short_name);
- if (short_len)
- return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
- hdev->short_name, short_len + 1);
-
- /* use shortened full name if present, we already know that name
- * is longer then HCI_MAX_SHORT_NAME_LENGTH
- */
- if (complete_len) {
- u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
-
- memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH);
- name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
-
- return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name,
- sizeof(name));
- }
-
- return ad_len;
-}
-
-static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
-{
- return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
-}
-
-static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
-{
- u8 scan_rsp_len = 0;
-
- if (hdev->appearance)
- scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
-
- return append_local_name(hdev, ptr, scan_rsp_len);
-}
-
-static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
- u8 *ptr)
-{
- struct adv_info *adv_instance;
- u32 instance_flags;
- u8 scan_rsp_len = 0;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return 0;
-
- instance_flags = adv_instance->flags;
-
- if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance)
- scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
-
- memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data,
- adv_instance->scan_rsp_len);
-
- scan_rsp_len += adv_instance->scan_rsp_len;
-
- if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME)
- scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len);
-
- return scan_rsp_len;
-}
-
void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
{
struct hci_dev *hdev = req->hdev;
@@ -1723,11 +1437,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
memset(&pdu, 0, sizeof(pdu));
- if (instance)
- len = create_instance_scan_rsp_data(hdev, instance,
- pdu.data);
- else
- len = create_default_scan_rsp_data(hdev, pdu.data);
+ len = eir_create_scan_rsp(hdev, instance, pdu.data);
if (hdev->scan_rsp_data_len == len &&
!memcmp(pdu.data, hdev->scan_rsp_data, len))
@@ -1748,11 +1458,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
memset(&cp, 0, sizeof(cp));
- if (instance)
- len = create_instance_scan_rsp_data(hdev, instance,
- cp.data);
- else
- len = create_default_scan_rsp_data(hdev, cp.data);
+ len = eir_create_scan_rsp(hdev, instance, cp.data);
if (hdev->scan_rsp_data_len == len &&
!memcmp(cp.data, hdev->scan_rsp_data, len))
@@ -1767,95 +1473,6 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
}
}
-static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
-{
- struct adv_info *adv_instance = NULL;
- u8 ad_len = 0, flags = 0;
- u32 instance_flags;
-
- /* Return 0 when the current instance identifier is invalid. */
- if (instance) {
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return 0;
- }
-
- instance_flags = get_adv_instance_flags(hdev, instance);
-
- /* If instance already has the flags set skip adding it once
- * again.
- */
- if (adv_instance && eir_get_data(adv_instance->adv_data,
- adv_instance->adv_data_len, EIR_FLAGS,
- NULL))
- goto skip_flags;
-
- /* The Add Advertising command allows userspace to set both the general
- * and limited discoverable flags.
- */
- if (instance_flags & MGMT_ADV_FLAG_DISCOV)
- flags |= LE_AD_GENERAL;
-
- if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV)
- flags |= LE_AD_LIMITED;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- flags |= LE_AD_NO_BREDR;
-
- if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) {
- /* If a discovery flag wasn't provided, simply use the global
- * settings.
- */
- if (!flags)
- flags |= mgmt_get_adv_discov_flags(hdev);
-
- /* If flags would still be empty, then there is no need to
- * include the "Flags" AD field".
- */
- if (flags) {
- ptr[0] = 0x02;
- ptr[1] = EIR_FLAGS;
- ptr[2] = flags;
-
- ad_len += 3;
- ptr += 3;
- }
- }
-
-skip_flags:
- if (adv_instance) {
- memcpy(ptr, adv_instance->adv_data,
- adv_instance->adv_data_len);
- ad_len += adv_instance->adv_data_len;
- ptr += adv_instance->adv_data_len;
- }
-
- if (instance_flags & MGMT_ADV_FLAG_TX_POWER) {
- s8 adv_tx_power;
-
- if (ext_adv_capable(hdev)) {
- if (adv_instance)
- adv_tx_power = adv_instance->tx_power;
- else
- adv_tx_power = hdev->adv_tx_power;
- } else {
- adv_tx_power = hdev->adv_tx_power;
- }
-
- /* Provide Tx Power only if we can provide a valid value for it */
- if (adv_tx_power != HCI_TX_POWER_INVALID) {
- ptr[0] = 0x02;
- ptr[1] = EIR_TX_POWER;
- ptr[2] = (u8)adv_tx_power;
-
- ad_len += 3;
- ptr += 3;
- }
- }
-
- return ad_len;
-}
-
void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
{
struct hci_dev *hdev = req->hdev;
@@ -1872,7 +1489,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
memset(&pdu, 0, sizeof(pdu));
- len = create_instance_adv_data(hdev, instance, pdu.data);
+ len = eir_create_adv_data(hdev, instance, pdu.data);
/* There's nothing to do if the data hasn't changed */
if (hdev->adv_data_len == len &&
@@ -1894,7 +1511,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
memset(&cp, 0, sizeof(cp));
- len = create_instance_adv_data(hdev, instance, cp.data);
+ len = eir_create_adv_data(hdev, instance, cp.data);
/* There's nothing to do if the data hasn't changed */
if (hdev->adv_data_len == len &&
@@ -2183,7 +1800,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
adv_instance = NULL;
}
- flags = get_adv_instance_flags(hdev, instance);
+ flags = hci_adv_instance_flags(hdev, instance);
/* If the "connectable" instance flag was not set, then choose between
* ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
@@ -2223,7 +1840,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND);
else
cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
- } else if (adv_instance_is_scannable(hdev, instance) ||
+ } else if (hci_adv_instance_is_scannable(hdev, instance) ||
(flags & MGMT_ADV_PARAM_SCAN_RSP)) {
if (secondary_adv)
cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND);
@@ -3327,6 +2944,53 @@ bool hci_req_stop_discovery(struct hci_request *req)
return ret;
}
+static void config_data_path_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ bt_dev_dbg(hdev, "status %u", status);
+}
+
+int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec)
+{
+ struct hci_request req;
+ int err;
+ __u8 vnd_len, *vnd_data = NULL;
+ struct hci_op_configure_data_path *cmd = NULL;
+
+ hci_req_init(&req, hdev);
+
+ err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
+ &vnd_data);
+ if (err < 0)
+ goto error;
+
+ cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ err = hdev->get_data_path_id(hdev, &cmd->data_path_id);
+ if (err < 0)
+ goto error;
+
+ cmd->vnd_len = vnd_len;
+ memcpy(cmd->vnd_data, vnd_data, vnd_len);
+
+ cmd->direction = 0x00;
+ hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
+
+ cmd->direction = 0x01;
+ hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
+
+ err = hci_req_run(&req, config_data_path_complete);
+error:
+
+ kfree(cmd);
+ kfree(vnd_data);
+ return err;
+}
+
static int stop_discovery(struct hci_request *req, unsigned long opt)
{
hci_dev_lock(req->hdev);
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 39ee8a18087a..f31420f58525 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -101,6 +101,8 @@ void __hci_req_update_class(struct hci_request *req);
/* Returns true if HCI commands were queued */
bool hci_req_stop_discovery(struct hci_request *req);
+int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec);
+
static inline void hci_req_update_scan(struct hci_dev *hdev)
{
queue_work(hdev->req_workqueue, &hdev->scan_update);
@@ -122,26 +124,3 @@ static inline void hci_update_background_scan(struct hci_dev *hdev)
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
-
-u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
-
-static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
- u8 *data, u8 data_len)
-{
- eir[eir_len++] = sizeof(type) + data_len;
- eir[eir_len++] = type;
- memcpy(&eir[eir_len], data, data_len);
- eir_len += data_len;
-
- return eir_len;
-}
-
-static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
-{
- eir[eir_len++] = sizeof(type) + sizeof(data);
- eir[eir_len++] = type;
- put_unaligned_le16(data, &eir[eir_len]);
- eir_len += sizeof(data);
-
- return eir_len;
-}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f1128c2134f0..d0dad1fafe07 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -57,6 +57,7 @@ struct hci_pinfo {
unsigned long flags;
__u32 cookie;
char comm[TASK_COMM_LEN];
+ __u16 mtu;
};
static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
@@ -1374,6 +1375,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
break;
}
+ /* Default MTU to HCI_MAX_FRAME_SIZE if not set */
+ if (!hci_pi(sk)->mtu)
+ hci_pi(sk)->mtu = HCI_MAX_FRAME_SIZE;
+
sk->sk_state = BT_BOUND;
done:
@@ -1506,9 +1511,8 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
- struct msghdr *msg, size_t msglen)
+ struct sk_buff *skb)
{
- void *buf;
u8 *cp;
struct mgmt_hdr *hdr;
u16 opcode, index, len;
@@ -1517,40 +1521,31 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
bool var_len, no_hdev;
int err;
- BT_DBG("got %zu bytes", msglen);
+ BT_DBG("got %d bytes", skb->len);
- if (msglen < sizeof(*hdr))
+ if (skb->len < sizeof(*hdr))
return -EINVAL;
- buf = kmalloc(msglen, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (memcpy_from_msg(buf, msg, msglen)) {
- err = -EFAULT;
- goto done;
- }
-
- hdr = buf;
+ hdr = (void *)skb->data;
opcode = __le16_to_cpu(hdr->opcode);
index = __le16_to_cpu(hdr->index);
len = __le16_to_cpu(hdr->len);
- if (len != msglen - sizeof(*hdr)) {
+ if (len != skb->len - sizeof(*hdr)) {
err = -EINVAL;
goto done;
}
if (chan->channel == HCI_CHANNEL_CONTROL) {
- struct sk_buff *skb;
+ struct sk_buff *cmd;
/* Send event to monitor */
- skb = create_monitor_ctrl_command(sk, index, opcode, len,
- buf + sizeof(*hdr));
- if (skb) {
- hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+ cmd = create_monitor_ctrl_command(sk, index, opcode, len,
+ skb->data + sizeof(*hdr));
+ if (cmd) {
+ hci_send_to_channel(HCI_CHANNEL_MONITOR, cmd,
HCI_SOCK_TRUSTED, NULL);
- kfree_skb(skb);
+ kfree_skb(cmd);
}
}
@@ -1615,26 +1610,25 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
if (hdev && chan->hdev_init)
chan->hdev_init(sk, hdev);
- cp = buf + sizeof(*hdr);
+ cp = skb->data + sizeof(*hdr);
err = handler->func(sk, hdev, cp, len);
if (err < 0)
goto done;
- err = msglen;
+ err = skb->len;
done:
if (hdev)
hci_dev_put(hdev);
- kfree(buf);
return err;
}
-static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len)
+static int hci_logging_frame(struct sock *sk, struct sk_buff *skb,
+ unsigned int flags)
{
struct hci_mon_hdr *hdr;
- struct sk_buff *skb;
struct hci_dev *hdev;
u16 index;
int err;
@@ -1643,24 +1637,13 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len)
* the priority byte, the ident length byte and at least one string
* terminator NUL byte. Anything shorter are invalid packets.
*/
- if (len < sizeof(*hdr) + 3)
+ if (skb->len < sizeof(*hdr) + 3)
return -EINVAL;
- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- return err;
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
- goto drop;
- }
-
hdr = (void *)skb->data;
- if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) {
- err = -EINVAL;
- goto drop;
- }
+ if (__le16_to_cpu(hdr->len) != skb->len - sizeof(*hdr))
+ return -EINVAL;
if (__le16_to_cpu(hdr->opcode) == 0x0000) {
__u8 priority = skb->data[sizeof(*hdr)];
@@ -1679,25 +1662,20 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len)
* The message follows the ident string (if present) and
* must be NUL terminated. Otherwise it is not a valid packet.
*/
- if (priority > 7 || skb->data[len - 1] != 0x00 ||
- ident_len > len - sizeof(*hdr) - 3 ||
- skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) {
- err = -EINVAL;
- goto drop;
- }
+ if (priority > 7 || skb->data[skb->len - 1] != 0x00 ||
+ ident_len > skb->len - sizeof(*hdr) - 3 ||
+ skb->data[sizeof(*hdr) + ident_len + 1] != 0x00)
+ return -EINVAL;
} else {
- err = -EINVAL;
- goto drop;
+ return -EINVAL;
}
index = __le16_to_cpu(hdr->index);
if (index != MGMT_INDEX_NONE) {
hdev = hci_dev_get(index);
- if (!hdev) {
- err = -ENODEV;
- goto drop;
- }
+ if (!hdev)
+ return -ENODEV;
} else {
hdev = NULL;
}
@@ -1705,13 +1683,11 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len)
hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING);
hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL);
- err = len;
+ err = skb->len;
if (hdev)
hci_dev_put(hdev);
-drop:
- kfree_skb(skb);
return err;
}
@@ -1723,19 +1699,23 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
struct hci_dev *hdev;
struct sk_buff *skb;
int err;
+ const unsigned int flags = msg->msg_flags;
BT_DBG("sock %p sk %p", sock, sk);
- if (msg->msg_flags & MSG_OOB)
+ if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE|
- MSG_CMSG_COMPAT))
+ if (flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE | MSG_CMSG_COMPAT))
return -EINVAL;
- if (len < 4 || len > HCI_MAX_FRAME_SIZE)
+ if (len < 4 || len > hci_pi(sk)->mtu)
return -EINVAL;
+ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
lock_sock(sk);
switch (hci_pi(sk)->channel) {
@@ -1744,39 +1724,30 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
break;
case HCI_CHANNEL_MONITOR:
err = -EOPNOTSUPP;
- goto done;
+ goto drop;
case HCI_CHANNEL_LOGGING:
- err = hci_logging_frame(sk, msg, len);
- goto done;
+ err = hci_logging_frame(sk, skb, flags);
+ goto drop;
default:
mutex_lock(&mgmt_chan_list_lock);
chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
if (chan)
- err = hci_mgmt_cmd(chan, sk, msg, len);
+ err = hci_mgmt_cmd(chan, sk, skb);
else
err = -EINVAL;
mutex_unlock(&mgmt_chan_list_lock);
- goto done;
+ goto drop;
}
hdev = hci_hdev_from_sock(sk);
if (IS_ERR(hdev)) {
err = PTR_ERR(hdev);
- goto done;
+ goto drop;
}
if (!test_bit(HCI_UP, &hdev->flags)) {
err = -ENETDOWN;
- goto done;
- }
-
- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- goto done;
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
goto drop;
}
@@ -1857,8 +1828,8 @@ drop:
goto done;
}
-static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int len)
{
struct hci_ufilter uf = { .opcode = 0 };
struct sock *sk = sock->sk;
@@ -1866,9 +1837,6 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
BT_DBG("sk %p, opt %d", sk, optname);
- if (level != SOL_HCI)
- return -ENOPROTOOPT;
-
lock_sock(sk);
if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
@@ -1943,18 +1911,63 @@ done:
return err;
}
-static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int len)
{
- struct hci_ufilter uf;
struct sock *sk = sock->sk;
- int len, opt, err = 0;
+ int err = 0, opt = 0;
BT_DBG("sk %p, opt %d", sk, optname);
- if (level != SOL_HCI)
+ if (level == SOL_HCI)
+ return hci_sock_setsockopt_old(sock, level, optname, optval,
+ len);
+
+ if (level != SOL_BLUETOOTH)
return -ENOPROTOOPT;
+ lock_sock(sk);
+
+ switch (optname) {
+ case BT_SNDMTU:
+ case BT_RCVMTU:
+ switch (hci_pi(sk)->channel) {
+ /* Don't allow changing MTU for channels that are meant for HCI
+ * traffic only.
+ */
+ case HCI_CHANNEL_RAW:
+ case HCI_CHANNEL_USER:
+ err = -ENOPROTOOPT;
+ goto done;
+ }
+
+ if (copy_from_sockptr(&opt, optval, sizeof(u16))) {
+ err = -EFAULT;
+ break;
+ }
+
+ hci_pi(sk)->mtu = opt;
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+done:
+ release_sock(sk);
+ return err;
+}
+
+static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct hci_ufilter uf;
+ struct sock *sk = sock->sk;
+ int len, opt, err = 0;
+
+ BT_DBG("sk %p, opt %d", sk, optname);
+
if (get_user(len, optlen))
return -EFAULT;
@@ -2012,6 +2025,39 @@ done:
return err;
}
+static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ BT_DBG("sk %p, opt %d", sk, optname);
+
+ if (level == SOL_HCI)
+ return hci_sock_getsockopt_old(sock, level, optname, optval,
+ optlen);
+
+ if (level != SOL_BLUETOOTH)
+ return -ENOPROTOOPT;
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case BT_SNDMTU:
+ case BT_RCVMTU:
+ if (put_user(hci_pi(sk)->mtu, (u16 __user *)optval))
+ err = -EFAULT;
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+ return err;
+}
+
static const struct proto_ops hci_sock_ops = {
.family = PF_BLUETOOTH,
.owner = THIS_MODULE,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 77ba68209dbd..4f8f37599962 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7902,7 +7902,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
dst_type = ADDR_LE_DEV_RANDOM;
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
- hcon = hci_connect_le(hdev, dst, dst_type,
+ hcon = hci_connect_le(hdev, dst, dst_type, false,
chan->sec_level,
HCI_LE_CONN_TIMEOUT,
HCI_ROLE_SLAVE, NULL);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index c99d65ef13b1..160c016a5dfb 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1508,6 +1508,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
l2cap_sock_kill(sk);
}
@@ -1516,6 +1519,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
struct sock *sk = chan->data;
struct sock *parent;
+ if (!sk)
+ return;
+
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
/* This callback can be called both for server (BT_LISTEN)
@@ -1707,8 +1713,10 @@ static void l2cap_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
- if (l2cap_pi(sk)->chan)
+ if (l2cap_pi(sk)->chan) {
+ l2cap_pi(sk)->chan->data = NULL;
l2cap_chan_put(l2cap_pi(sk)->chan);
+ }
if (l2cap_pi(sk)->rx_busy_skb) {
kfree_skb(l2cap_pi(sk)->rx_busy_skb);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index cea01e275f1e..3e5283607b97 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,6 +38,7 @@
#include "mgmt_util.h"
#include "mgmt_config.h"
#include "msft.h"
+#include "eir.h"
#define MGMT_VERSION 1
#define MGMT_REVISION 21
@@ -3791,6 +3792,18 @@ static const u8 debug_uuid[16] = {
};
#endif
+/* 330859bc-7506-492d-9370-9a6f0614037f */
+static const u8 quality_report_uuid[16] = {
+ 0x7f, 0x03, 0x14, 0x06, 0x6f, 0x9a, 0x70, 0x93,
+ 0x2d, 0x49, 0x06, 0x75, 0xbc, 0x59, 0x08, 0x33,
+};
+
+/* a6695ace-ee7f-4fb9-881a-5fac66c629af */
+static const u8 offload_codecs_uuid[16] = {
+ 0xaf, 0x29, 0xc6, 0x66, 0xac, 0x5f, 0x1a, 0x88,
+ 0xb9, 0x4f, 0x7f, 0xee, 0xce, 0x5a, 0x69, 0xa6,
+};
+
/* 671b10b5-42c0-4696-9227-eb28d1b049d6 */
static const u8 simult_central_periph_uuid[16] = {
0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92,
@@ -3806,7 +3819,7 @@ static const u8 rpa_resolution_uuid[16] = {
static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
void *data, u16 data_len)
{
- char buf[62]; /* Enough space for 3 features */
+ char buf[102]; /* Enough space for 5 features: 2 + 20 * 5 */
struct mgmt_rp_read_exp_features_info *rp = (void *)buf;
u16 idx = 0;
u32 flags;
@@ -3850,6 +3863,28 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
idx++;
}
+ if (hdev && hdev->set_quality_report) {
+ if (hci_dev_test_flag(hdev, HCI_QUALITY_REPORT))
+ flags = BIT(0);
+ else
+ flags = 0;
+
+ memcpy(rp->features[idx].uuid, quality_report_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
+ if (hdev && hdev->get_data_path_id) {
+ if (hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED))
+ flags = BIT(0);
+ else
+ flags = 0;
+
+ memcpy(rp->features[idx].uuid, offload_codecs_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
rp->feature_count = cpu_to_le16(idx);
/* After reading the experimental features information, enable
@@ -3892,150 +3927,341 @@ static int exp_debug_feature_changed(bool enabled, struct sock *skip)
}
#endif
-static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
- void *data, u16 data_len)
+static int exp_quality_report_feature_changed(bool enabled, struct sock *skip)
{
- struct mgmt_cp_set_exp_feature *cp = data;
- struct mgmt_rp_set_exp_feature rp;
+ struct mgmt_ev_exp_feature_changed ev;
- bt_dev_dbg(hdev, "sock %p", sk);
+ memset(&ev, 0, sizeof(ev));
+ memcpy(ev.uuid, quality_report_uuid, 16);
+ ev.flags = cpu_to_le32(enabled ? BIT(0) : 0);
+
+ return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL,
+ &ev, sizeof(ev),
+ HCI_MGMT_EXP_FEATURE_EVENTS, skip);
+}
+
+#define EXP_FEAT(_uuid, _set_func) \
+{ \
+ .uuid = _uuid, \
+ .set_func = _set_func, \
+}
+
+/* The zero key uuid is special. Multiple exp features are set through it. */
+static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
- if (!memcmp(cp->uuid, ZERO_KEY, 16)) {
- memset(rp.uuid, 0, 16);
- rp.flags = cpu_to_le32(0);
+ memset(rp.uuid, 0, 16);
+ rp.flags = cpu_to_le32(0);
#ifdef CONFIG_BT_FEATURE_DEBUG
- if (!hdev) {
- bool changed = bt_dbg_get();
+ if (!hdev) {
+ bool changed = bt_dbg_get();
- bt_dbg_set(false);
+ bt_dbg_set(false);
- if (changed)
- exp_debug_feature_changed(false, sk);
- }
+ if (changed)
+ exp_debug_feature_changed(false, sk);
+ }
#endif
- if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) {
- bool changed = hci_dev_test_flag(hdev,
- HCI_ENABLE_LL_PRIVACY);
+ if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) {
+ bool changed = hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY);
- hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
- if (changed)
- exp_ll_privacy_feature_changed(false, hdev, sk);
- }
+ if (changed)
+ exp_ll_privacy_feature_changed(false, hdev, sk);
+ }
- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
- return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
- MGMT_OP_SET_EXP_FEATURE, 0,
- &rp, sizeof(rp));
- }
+ return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+}
#ifdef CONFIG_BT_FEATURE_DEBUG
- if (!memcmp(cp->uuid, debug_uuid, 16)) {
- bool val, changed;
- int err;
+static int set_debug_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
- /* Command requires to use the non-controller index */
- if (hdev)
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_INDEX);
+ bool val, changed;
+ int err;
- /* Parameters are limited to a single octet */
- if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_PARAMS);
+ /* Command requires to use the non-controller index */
+ if (hdev)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
- /* Only boolean on/off is supported */
- if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_PARAMS);
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
- val = !!cp->param[0];
- changed = val ? !bt_dbg_get() : bt_dbg_get();
- bt_dbg_set(val);
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
- memcpy(rp.uuid, debug_uuid, 16);
- rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+ val = !!cp->param[0];
+ changed = val ? !bt_dbg_get() : bt_dbg_get();
+ bt_dbg_set(val);
- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+ memcpy(rp.uuid, debug_uuid, 16);
+ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
- err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
- MGMT_OP_SET_EXP_FEATURE, 0,
- &rp, sizeof(rp));
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
- if (changed)
- exp_debug_feature_changed(val, sk);
+ err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
- return err;
- }
+ if (changed)
+ exp_debug_feature_changed(val, sk);
+
+ return err;
+}
#endif
- if (!memcmp(cp->uuid, rpa_resolution_uuid, 16)) {
- bool val, changed;
- int err;
- u32 flags;
+static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp,
+ u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
+ bool val, changed;
+ int err;
+ u32 flags;
+
+ /* Command requires to use the controller index */
+ if (!hdev)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
- /* Command requires to use the controller index */
- if (!hdev)
- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_INDEX);
+ /* Changes can only be made when controller is powered down */
+ if (hdev_is_powered(hdev))
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_REJECTED);
- /* Changes can only be made when controller is powered down */
- if (hdev_is_powered(hdev))
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_REJECTED);
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
- /* Parameters are limited to a single octet */
- if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_PARAMS);
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
- /* Only boolean on/off is supported */
- if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_EXP_FEATURE,
- MGMT_STATUS_INVALID_PARAMS);
+ val = !!cp->param[0];
- val = !!cp->param[0];
+ if (val) {
+ changed = !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ hci_dev_clear_flag(hdev, HCI_ADVERTISING);
- if (val) {
- changed = !hci_dev_test_flag(hdev,
- HCI_ENABLE_LL_PRIVACY);
- hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY);
- hci_dev_clear_flag(hdev, HCI_ADVERTISING);
+ /* Enable LL privacy + supported settings changed */
+ flags = BIT(0) | BIT(1);
+ } else {
+ changed = hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
- /* Enable LL privacy + supported settings changed */
- flags = BIT(0) | BIT(1);
- } else {
- changed = hci_dev_test_flag(hdev,
- HCI_ENABLE_LL_PRIVACY);
- hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ /* Disable LL privacy + supported settings changed */
+ flags = BIT(1);
+ }
- /* Disable LL privacy + supported settings changed */
- flags = BIT(1);
+ memcpy(rp.uuid, rpa_resolution_uuid, 16);
+ rp.flags = cpu_to_le32(flags);
+
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+
+ if (changed)
+ exp_ll_privacy_feature_changed(val, hdev, sk);
+
+ return err;
+}
+
+static int set_quality_report_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp,
+ u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
+ bool val, changed;
+ int err;
+
+ /* Command requires to use a valid controller index */
+ if (!hdev)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
+
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_req_sync_lock(hdev);
+
+ val = !!cp->param[0];
+ changed = (val != hci_dev_test_flag(hdev, HCI_QUALITY_REPORT));
+
+ if (!hdev->set_quality_report) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_NOT_SUPPORTED);
+ goto unlock_quality_report;
+ }
+
+ if (changed) {
+ err = hdev->set_quality_report(hdev, val);
+ if (err) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_FAILED);
+ goto unlock_quality_report;
}
+ if (val)
+ hci_dev_set_flag(hdev, HCI_QUALITY_REPORT);
+ else
+ hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT);
+ }
- memcpy(rp.uuid, rpa_resolution_uuid, 16);
- rp.flags = cpu_to_le32(flags);
+ bt_dev_dbg(hdev, "quality report enable %d changed %d", val, changed);
- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+ memcpy(rp.uuid, quality_report_uuid, 16);
+ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
- err = mgmt_cmd_complete(sk, hdev->id,
- MGMT_OP_SET_EXP_FEATURE, 0,
- &rp, sizeof(rp));
+ if (changed)
+ exp_quality_report_feature_changed(val, sk);
- if (changed)
- exp_ll_privacy_feature_changed(val, hdev, sk);
+unlock_quality_report:
+ hci_req_sync_unlock(hdev);
+ return err;
+}
- return err;
+static int exp_offload_codec_feature_changed(bool enabled, struct sock *skip)
+{
+ struct mgmt_ev_exp_feature_changed ev;
+
+ memset(&ev, 0, sizeof(ev));
+ memcpy(ev.uuid, offload_codecs_uuid, 16);
+ ev.flags = cpu_to_le32(enabled ? BIT(0) : 0);
+
+ return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL,
+ &ev, sizeof(ev),
+ HCI_MGMT_EXP_FEATURE_EVENTS, skip);
+}
+
+static int set_offload_codec_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp,
+ u16 data_len)
+{
+ bool val, changed;
+ int err;
+ struct mgmt_rp_set_exp_feature rp;
+
+ /* Command requires to use a valid controller index */
+ if (!hdev)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
+
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ val = !!cp->param[0];
+ changed = (val != hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED));
+
+ if (!hdev->get_data_path_id) {
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_NOT_SUPPORTED);
+ }
+
+ if (changed) {
+ if (val)
+ hci_dev_set_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED);
+ else
+ hci_dev_clear_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED);
+ }
+
+ bt_dev_info(hdev, "offload codecs enable %d changed %d",
+ val, changed);
+
+ memcpy(rp.uuid, offload_codecs_uuid, 16);
+ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+
+ if (changed)
+ exp_offload_codec_feature_changed(val, sk);
+
+ return err;
+}
+
+static const struct mgmt_exp_feature {
+ const u8 *uuid;
+ int (*set_func)(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp, u16 data_len);
+} exp_features[] = {
+ EXP_FEAT(ZERO_KEY, set_zero_key_func),
+#ifdef CONFIG_BT_FEATURE_DEBUG
+ EXP_FEAT(debug_uuid, set_debug_func),
+#endif
+ EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func),
+ EXP_FEAT(quality_report_uuid, set_quality_report_func),
+ EXP_FEAT(offload_codecs_uuid, set_offload_codec_func),
+
+ /* end with a null feature */
+ EXP_FEAT(NULL, NULL)
+};
+
+static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 data_len)
+{
+ struct mgmt_cp_set_exp_feature *cp = data;
+ size_t i = 0;
+
+ bt_dev_dbg(hdev, "sock %p", sk);
+
+ for (i = 0; exp_features[i].uuid; i++) {
+ if (!memcmp(cp->uuid, exp_features[i].uuid, 16))
+ return exp_features[i].set_func(sk, hdev, cp, data_len);
}
return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
@@ -7315,6 +7541,11 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
if (!rp)
return -ENOMEM;
+ if (!status && !lmp_ssp_capable(hdev)) {
+ status = MGMT_STATUS_NOT_SUPPORTED;
+ eir_len = 0;
+ }
+
if (status)
goto complete;
@@ -7526,7 +7757,7 @@ static u8 calculate_name_len(struct hci_dev *hdev)
{
u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3];
- return append_local_name(hdev, buf, 0);
+ return eir_append_local_name(hdev, buf, 0);
}
static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
@@ -8222,7 +8453,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
* advertising.
*/
if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_NOT_SUPPORTED);
hci_dev_lock(hdev);
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index b4bfae41e8a5..255cffa554ee 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -94,11 +94,14 @@ struct msft_data {
__u16 pending_add_handle;
__u16 pending_remove_handle;
__u8 reregistering;
+ __u8 suspending;
__u8 filter_enabled;
};
static int __msft_add_monitor_pattern(struct hci_dev *hdev,
struct adv_monitor *monitor);
+static int __msft_remove_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor, u16 handle);
bool msft_monitor_supported(struct hci_dev *hdev)
{
@@ -154,7 +157,7 @@ failed:
}
/* This function requires the caller holds hdev->lock */
-static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle)
+static void reregister_monitor(struct hci_dev *hdev, int handle)
{
struct adv_monitor *monitor;
struct msft_data *msft = hdev->msft_data;
@@ -182,31 +185,102 @@ static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle)
}
}
+/* This function requires the caller holds hdev->lock */
+static void remove_monitor_on_suspend(struct hci_dev *hdev, int handle)
+{
+ struct adv_monitor *monitor;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ while (1) {
+ monitor = idr_get_next(&hdev->adv_monitors_idr, &handle);
+ if (!monitor) {
+ /* All monitors have been removed */
+ msft->suspending = false;
+ hci_update_background_scan(hdev);
+ return;
+ }
+
+ msft->pending_remove_handle = (u16)handle;
+ err = __msft_remove_monitor(hdev, monitor, handle);
+
+ /* If success, return and wait for monitor removed callback */
+ if (!err)
+ return;
+
+ /* Otherwise free the monitor and keep removing */
+ hci_free_adv_monitor(hdev, monitor);
+ handle++;
+ }
+}
+
+/* This function requires the caller holds hdev->lock */
+void msft_suspend(struct hci_dev *hdev)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft)
+ return;
+
+ if (msft_monitor_supported(hdev)) {
+ msft->suspending = true;
+ /* Quitely remove all monitors on suspend to avoid waking up
+ * the system.
+ */
+ remove_monitor_on_suspend(hdev, 0);
+ }
+}
+
+/* This function requires the caller holds hdev->lock */
+void msft_resume(struct hci_dev *hdev)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft)
+ return;
+
+ if (msft_monitor_supported(hdev)) {
+ msft->reregistering = true;
+ /* Monitors are removed on suspend, so we need to add all
+ * monitors on resume.
+ */
+ reregister_monitor(hdev, 0);
+ }
+}
+
void msft_do_open(struct hci_dev *hdev)
{
- struct msft_data *msft;
+ struct msft_data *msft = hdev->msft_data;
if (hdev->msft_opcode == HCI_OP_NOP)
return;
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT extension not registered");
+ return;
+ }
+
bt_dev_dbg(hdev, "Initialize MSFT extension");
- msft = kzalloc(sizeof(*msft), GFP_KERNEL);
- if (!msft)
- return;
+ /* Reset existing MSFT data before re-reading */
+ kfree(msft->evt_prefix);
+ msft->evt_prefix = NULL;
+ msft->evt_prefix_len = 0;
+ msft->features = 0;
if (!read_supported_features(hdev, msft)) {
+ hdev->msft_data = NULL;
kfree(msft);
return;
}
- INIT_LIST_HEAD(&msft->handle_map);
- hdev->msft_data = msft;
-
if (msft_monitor_supported(hdev)) {
msft->reregistering = true;
msft_set_filter_enable(hdev, true);
- reregister_monitor_on_restart(hdev, 0);
+ /* Monitors get removed on power off, so we need to explicitly
+ * tell the controller to re-monitor.
+ */
+ reregister_monitor(hdev, 0);
}
}
@@ -221,8 +295,9 @@ void msft_do_close(struct hci_dev *hdev)
bt_dev_dbg(hdev, "Cleanup of MSFT extension");
- hdev->msft_data = NULL;
-
+ /* The controller will silently remove all monitors on power off.
+ * Therefore, remove handle_data mapping and reset monitor state.
+ */
list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) {
monitor = idr_find(&hdev->adv_monitors_idr,
handle_data->mgmt_handle);
@@ -233,6 +308,34 @@ void msft_do_close(struct hci_dev *hdev)
list_del(&handle_data->list);
kfree(handle_data);
}
+}
+
+void msft_register(struct hci_dev *hdev)
+{
+ struct msft_data *msft = NULL;
+
+ bt_dev_dbg(hdev, "Register MSFT extension");
+
+ msft = kzalloc(sizeof(*msft), GFP_KERNEL);
+ if (!msft) {
+ bt_dev_err(hdev, "Failed to register MSFT extension");
+ return;
+ }
+
+ INIT_LIST_HEAD(&msft->handle_map);
+ hdev->msft_data = msft;
+}
+
+void msft_unregister(struct hci_dev *hdev)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft)
+ return;
+
+ bt_dev_dbg(hdev, "Unregister MSFT extension");
+
+ hdev->msft_data = NULL;
kfree(msft->evt_prefix);
kfree(msft);
@@ -345,8 +448,7 @@ unlock:
/* If in restart/reregister sequence, keep registering. */
if (msft->reregistering)
- reregister_monitor_on_restart(hdev,
- msft->pending_add_handle + 1);
+ reregister_monitor(hdev, msft->pending_add_handle + 1);
hci_dev_unlock(hdev);
@@ -383,13 +485,25 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
if (handle_data) {
monitor = idr_find(&hdev->adv_monitors_idr,
handle_data->mgmt_handle);
- if (monitor)
+
+ if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED)
+ monitor->state = ADV_MONITOR_STATE_REGISTERED;
+
+ /* Do not free the monitor if it is being removed due to
+ * suspend. It will be re-monitored on resume.
+ */
+ if (monitor && !msft->suspending)
hci_free_adv_monitor(hdev, monitor);
list_del(&handle_data->list);
kfree(handle_data);
}
+ /* If in suspend/remove sequence, keep removing. */
+ if (msft->suspending)
+ remove_monitor_on_suspend(hdev,
+ msft->pending_remove_handle + 1);
+
/* If remove all monitors is required, we need to continue the process
* here because the earlier it was paused when waiting for the
* response from controller.
@@ -408,7 +522,8 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
hci_dev_unlock(hdev);
done:
- hci_remove_adv_monitor_complete(hdev, status);
+ if (!msft->suspending)
+ hci_remove_adv_monitor_complete(hdev, status);
}
static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev,
@@ -541,15 +656,15 @@ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor)
if (!msft)
return -EOPNOTSUPP;
- if (msft->reregistering)
+ if (msft->reregistering || msft->suspending)
return -EBUSY;
return __msft_add_monitor_pattern(hdev, monitor);
}
/* This function requires the caller holds hdev->lock */
-int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
- u16 handle)
+static int __msft_remove_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor, u16 handle)
{
struct msft_cp_le_cancel_monitor_advertisement cp;
struct msft_monitor_advertisement_handle_data *handle_data;
@@ -557,12 +672,6 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
struct msft_data *msft = hdev->msft_data;
int err = 0;
- if (!msft)
- return -EOPNOTSUPP;
-
- if (msft->reregistering)
- return -EBUSY;
-
handle_data = msft_find_handle_data(hdev, monitor->handle, true);
/* If no matched handle, just remove without telling controller */
@@ -582,6 +691,21 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
return err;
}
+/* This function requires the caller holds hdev->lock */
+int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
+ u16 handle)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft)
+ return -EOPNOTSUPP;
+
+ if (msft->reregistering || msft->suspending)
+ return -EBUSY;
+
+ return __msft_remove_monitor(hdev, monitor, handle);
+}
+
void msft_req_add_set_filter_enable(struct hci_request *req, bool enable)
{
struct hci_dev *hdev = req->hdev;
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 6e56d94b88d8..59c6e081c789 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -13,6 +13,8 @@
#if IS_ENABLED(CONFIG_BT_MSFTEXT)
bool msft_monitor_supported(struct hci_dev *hdev);
+void msft_register(struct hci_dev *hdev);
+void msft_unregister(struct hci_dev *hdev);
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb);
@@ -22,6 +24,8 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
u16 handle);
void msft_req_add_set_filter_enable(struct hci_request *req, bool enable);
int msft_set_filter_enable(struct hci_dev *hdev, bool enable);
+void msft_suspend(struct hci_dev *hdev);
+void msft_resume(struct hci_dev *hdev);
bool msft_curve_validity(struct hci_dev *hdev);
#else
@@ -31,6 +35,8 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev)
return false;
}
+static inline void msft_register(struct hci_dev *hdev) {}
+static inline void msft_unregister(struct hci_dev *hdev) {}
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {}
@@ -55,6 +61,9 @@ static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable)
return -EOPNOTSUPP;
}
+static inline void msft_suspend(struct hci_dev *hdev) {}
+static inline void msft_resume(struct hci_dev *hdev) {}
+
static inline bool msft_curve_validity(struct hci_dev *hdev)
{
return false;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index f2bacb464ccf..7324764384b6 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel)
return dlc;
}
+static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag)
+{
+ int len = frag->len;
+
+ BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+
+ if (len > d->mtu)
+ return -EINVAL;
+
+ rfcomm_make_uih(frag, d->addr);
+ __skb_queue_tail(&d->tx_queue, frag);
+
+ return len;
+}
+
int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
{
- int len = skb->len;
+ unsigned long flags;
+ struct sk_buff *frag, *next;
+ int len;
if (d->state != BT_CONNECTED)
return -ENOTCONN;
- BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = NULL;
- if (len > d->mtu)
- return -EINVAL;
+ /* Queue all fragments atomically. */
+ spin_lock_irqsave(&d->tx_queue.lock, flags);
- rfcomm_make_uih(skb, d->addr);
- skb_queue_tail(&d->tx_queue, skb);
+ len = rfcomm_dlc_send_frag(d, skb);
+ if (len < 0 || !frag)
+ goto unlock;
+
+ for (; frag; frag = next) {
+ int ret;
+
+ next = frag->next;
+
+ ret = rfcomm_dlc_send_frag(d, frag);
+ if (ret < 0) {
+ kfree_skb(frag);
+ goto unlock;
+ }
+
+ len += ret;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&d->tx_queue.lock, flags);
- if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+ if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags))
rfcomm_schedule();
return len;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 2c95bb58f901..4bf4ea6cbb5e 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -575,46 +575,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
sent = bt_sock_wait_ready(sk, msg->msg_flags);
- if (sent)
- goto done;
-
- while (len) {
- size_t size = min_t(size_t, len, d->mtu);
- int err;
-
- skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
- msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb) {
- if (sent == 0)
- sent = err;
- break;
- }
- skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
-
- err = memcpy_from_msg(skb_put(skb, size), msg, size);
- if (err) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
- skb->priority = sk->sk_priority;
+ release_sock(sk);
- err = rfcomm_dlc_send(d, skb);
- if (err < 0) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
+ if (sent)
+ return sent;
- sent += size;
- len -= size;
- }
+ skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE,
+ RFCOMM_SKB_TAIL_RESERVE);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
-done:
- release_sock(sk);
+ sent = rfcomm_dlc_send(d, skb);
+ if (sent < 0)
+ kfree_skb(skb);
return sent;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 98a881586512..8eabf41b2993 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -69,6 +69,7 @@ struct sco_pinfo {
__u32 flags;
__u16 setting;
__u8 cmsg_mask;
+ struct bt_codec codec;
struct sco_conn *conn;
};
@@ -133,6 +134,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
return NULL;
spin_lock_init(&conn->lock);
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
hcon->sco_data = conn;
conn->hcon = hcon;
@@ -187,20 +189,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
/* Kill socket */
sco_conn_lock(conn);
sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
sco_conn_unlock(conn);
if (sk) {
- sock_hold(sk);
lock_sock(sk);
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
release_sock(sk);
sock_put(sk);
-
- /* Ensure no more work items will run before freeing conn. */
- cancel_delayed_work_sync(&conn->timeout_work);
}
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
+
hcon->sco_data = NULL;
kfree(conn);
}
@@ -213,8 +216,6 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
sco_pi(sk)->conn = conn;
conn->sk = sk;
- INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
-
if (parent)
bt_accept_enqueue(parent, sk, true);
}
@@ -252,7 +253,7 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk)
return -EOPNOTSUPP;
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
- sco_pi(sk)->setting);
+ sco_pi(sk)->setting, &sco_pi(sk)->codec);
if (IS_ERR(hcon))
return PTR_ERR(hcon);
@@ -280,11 +281,10 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk)
return err;
}
-static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
+static int sco_send_frame(struct sock *sk, struct sk_buff *skb)
{
struct sco_conn *conn = sco_pi(sk)->conn;
- struct sk_buff *skb;
- int err;
+ int len = skb->len;
/* Check outgoing MTU */
if (len > conn->mtu)
@@ -292,15 +292,6 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
BT_DBG("sk %p len %d", sk, len);
- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- return err;
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- kfree_skb(skb);
- return -EFAULT;
- }
-
hci_send_sco(conn->hcon, skb);
return len;
@@ -444,6 +435,7 @@ static void __sco_sock_close(struct sock *sk)
sock_set_flag(sk, SOCK_ZAPPED);
break;
}
+
}
/* Must be called on unlocked socket. */
@@ -504,6 +496,10 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = BT_OPEN;
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
+ sco_pi(sk)->codec.id = BT_CODEC_CVSD;
+ sco_pi(sk)->codec.cid = 0xffff;
+ sco_pi(sk)->codec.vid = 0xffff;
+ sco_pi(sk)->codec.data_path = 0x00;
bt_sock_link(&sco_sk_list, sk);
return sk;
@@ -725,6 +721,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
+ struct sk_buff *skb;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -736,14 +733,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
lock_sock(sk);
if (sk->sk_state == BT_CONNECTED)
- err = sco_send_frame(sk, msg, len);
+ err = sco_send_frame(sk, skb);
else
err = -ENOTCONN;
release_sock(sk);
+
+ if (err < 0)
+ kfree_skb(skb);
return err;
}
@@ -825,6 +829,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
int len, err = 0;
struct bt_voice voice;
u32 opt;
+ struct bt_codecs *codecs;
+ struct hci_dev *hdev;
+ __u8 buffer[255];
BT_DBG("sk %p", sk);
@@ -872,6 +879,16 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
}
sco_pi(sk)->setting = voice.setting;
+ hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src,
+ BDADDR_BREDR);
+ if (!hdev) {
+ err = -EBADFD;
+ break;
+ }
+ if (enhanced_sco_capable(hdev) &&
+ voice.setting == BT_VOICE_TRANSPARENT)
+ sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
+ hci_dev_put(hdev);
break;
case BT_PKT_STATUS:
@@ -886,6 +903,57 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS;
break;
+ case BT_CODEC:
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
+ sk->sk_state != BT_CONNECT2) {
+ err = -EINVAL;
+ break;
+ }
+
+ hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src,
+ BDADDR_BREDR);
+ if (!hdev) {
+ err = -EBADFD;
+ break;
+ }
+
+ if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) {
+ hci_dev_put(hdev);
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ if (!hdev->get_data_path_id) {
+ hci_dev_put(hdev);
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ if (optlen < sizeof(struct bt_codecs) ||
+ optlen > sizeof(buffer)) {
+ hci_dev_put(hdev);
+ err = -EINVAL;
+ break;
+ }
+
+ if (copy_from_sockptr(buffer, optval, optlen)) {
+ hci_dev_put(hdev);
+ err = -EFAULT;
+ break;
+ }
+
+ codecs = (void *)buffer;
+
+ if (codecs->num_codecs > 1) {
+ hci_dev_put(hdev);
+ err = -EINVAL;
+ break;
+ }
+
+ sco_pi(sk)->codec = codecs->codecs[0];
+ hci_dev_put(hdev);
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -964,6 +1032,12 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
struct bt_voice voice;
u32 phys;
int pkt_status;
+ int buf_len;
+ struct codec_list *c;
+ u8 num_codecs, i, __user *ptr;
+ struct hci_dev *hdev;
+ struct hci_codec_caps *caps;
+ struct bt_codec codec;
BT_DBG("sk %p", sk);
@@ -1028,6 +1102,101 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
err = -EFAULT;
break;
+ case BT_CODEC:
+ num_codecs = 0;
+ buf_len = 0;
+
+ hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
+ if (!hdev) {
+ err = -EBADFD;
+ break;
+ }
+
+ if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) {
+ hci_dev_put(hdev);
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ if (!hdev->get_data_path_id) {
+ hci_dev_put(hdev);
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ /* find total buffer size required to copy codec + caps */
+ hci_dev_lock(hdev);
+ list_for_each_entry(c, &hdev->local_codecs, list) {
+ if (c->transport != HCI_TRANSPORT_SCO_ESCO)
+ continue;
+ num_codecs++;
+ for (i = 0, caps = c->caps; i < c->num_caps; i++) {
+ buf_len += 1 + caps->len;
+ caps = (void *)&caps->data[caps->len];
+ }
+ buf_len += sizeof(struct bt_codec);
+ }
+ hci_dev_unlock(hdev);
+
+ buf_len += sizeof(struct bt_codecs);
+ if (buf_len > len) {
+ hci_dev_put(hdev);
+ err = -ENOBUFS;
+ break;
+ }
+ ptr = optval;
+
+ if (put_user(num_codecs, ptr)) {
+ hci_dev_put(hdev);
+ err = -EFAULT;
+ break;
+ }
+ ptr += sizeof(num_codecs);
+
+ /* Iterate all the codecs supported over SCO and populate
+ * codec data
+ */
+ hci_dev_lock(hdev);
+ list_for_each_entry(c, &hdev->local_codecs, list) {
+ if (c->transport != HCI_TRANSPORT_SCO_ESCO)
+ continue;
+
+ codec.id = c->id;
+ codec.cid = c->cid;
+ codec.vid = c->vid;
+ err = hdev->get_data_path_id(hdev, &codec.data_path);
+ if (err < 0)
+ break;
+ codec.num_caps = c->num_caps;
+ if (copy_to_user(ptr, &codec, sizeof(codec))) {
+ err = -EFAULT;
+ break;
+ }
+ ptr += sizeof(codec);
+
+ /* find codec capabilities data length */
+ len = 0;
+ for (i = 0, caps = c->caps; i < c->num_caps; i++) {
+ len += 1 + caps->len;
+ caps = (void *)&caps->data[caps->len];
+ }
+
+ /* copy codec capabilities data */
+ if (len && copy_to_user(ptr, c->caps, len)) {
+ err = -EFAULT;
+ break;
+ }
+ ptr += len;
+ }
+
+ if (!err && put_user(buf_len, optlen))
+ err = -EFAULT;
+
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+
+ break;
+
default:
err = -ENOPROTOOPT;
break;
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
index 1c0a98d8c28f..1ebe270bde23 100644
--- a/net/bpf/Makefile
+++ b/net/bpf/Makefile
@@ -1,2 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_BPF_SYSCALL) := test_run.o
+ifeq ($(CONFIG_BPF_JIT),y)
+obj-$(CONFIG_BPF_SYSCALL) += bpf_dummy_struct_ops.o
+endif
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
new file mode 100644
index 000000000000..fbc896323bec
--- /dev/null
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021. Huawei Technologies Co., Ltd
+ */
+#include <linux/kernel.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+
+extern struct bpf_struct_ops bpf_bpf_dummy_ops;
+
+/* A common type for test_N with return value in bpf_dummy_ops */
+typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
+
+struct bpf_dummy_ops_test_args {
+ u64 args[MAX_BPF_FUNC_ARGS];
+ struct bpf_dummy_ops_state state;
+};
+
+static struct bpf_dummy_ops_test_args *
+dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr)
+{
+ __u32 size_in;
+ struct bpf_dummy_ops_test_args *args;
+ void __user *ctx_in;
+ void __user *u_state;
+
+ size_in = kattr->test.ctx_size_in;
+ if (size_in != sizeof(u64) * nr)
+ return ERR_PTR(-EINVAL);
+
+ args = kzalloc(sizeof(*args), GFP_KERNEL);
+ if (!args)
+ return ERR_PTR(-ENOMEM);
+
+ ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+ if (copy_from_user(args->args, ctx_in, size_in))
+ goto out;
+
+ /* args[0] is 0 means state argument of test_N will be NULL */
+ u_state = u64_to_user_ptr(args->args[0]);
+ if (u_state && copy_from_user(&args->state, u_state,
+ sizeof(args->state)))
+ goto out;
+
+ return args;
+out:
+ kfree(args);
+ return ERR_PTR(-EFAULT);
+}
+
+static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
+{
+ void __user *u_state;
+
+ u_state = u64_to_user_ptr(args->args[0]);
+ if (u_state && copy_to_user(u_state, &args->state, sizeof(args->state)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
+{
+ dummy_ops_test_ret_fn test = (void *)image;
+ struct bpf_dummy_ops_state *state = NULL;
+
+ /* state needs to be NULL if args[0] is 0 */
+ if (args->args[0])
+ state = &args->state;
+ return test(state, args->args[1], args->args[2],
+ args->args[3], args->args[4]);
+}
+
+int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
+ const struct btf_type *func_proto;
+ struct bpf_dummy_ops_test_args *args;
+ struct bpf_tramp_progs *tprogs;
+ void *image = NULL;
+ unsigned int op_idx;
+ int prog_ret;
+ int err;
+
+ if (prog->aux->attach_btf_id != st_ops->type_id)
+ return -EOPNOTSUPP;
+
+ func_proto = prog->aux->attach_func_proto;
+ args = dummy_ops_init_args(kattr, btf_type_vlen(func_proto));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
+
+ tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+ if (!tprogs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image) {
+ err = -ENOMEM;
+ goto out;
+ }
+ set_vm_flush_reset_perms(image);
+
+ op_idx = prog->expected_attach_type;
+ err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ &st_ops->func_models[op_idx],
+ image, image + PAGE_SIZE);
+ if (err < 0)
+ goto out;
+
+ set_memory_ro((long)image, 1);
+ set_memory_x((long)image, 1);
+ prog_ret = dummy_ops_call_op(image, args);
+
+ err = dummy_ops_copy_args(args);
+ if (err)
+ goto out;
+ if (put_user(prog_ret, &uattr->test.retval))
+ err = -EFAULT;
+out:
+ kfree(args);
+ bpf_jit_free_exec(image);
+ kfree(tprogs);
+ return err;
+}
+
+static int bpf_dummy_init(struct btf *btf)
+{
+ return 0;
+}
+
+static bool bpf_dummy_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id)
+{
+ const struct btf_type *state;
+ s32 type_id;
+ int err;
+
+ type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+
+ state = btf_type_by_id(btf, type_id);
+ if (t != state) {
+ bpf_log(log, "only access to bpf_dummy_ops_state is supported\n");
+ return -EACCES;
+ }
+
+ err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ if (err < 0)
+ return err;
+
+ return atype == BPF_READ ? err : NOT_INIT;
+}
+
+static const struct bpf_verifier_ops bpf_dummy_verifier_ops = {
+ .is_valid_access = bpf_dummy_ops_is_valid_access,
+ .btf_struct_access = bpf_dummy_ops_btf_struct_access,
+};
+
+static int bpf_dummy_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return -EOPNOTSUPP;
+}
+
+static int bpf_dummy_reg(void *kdata)
+{
+ return -EOPNOTSUPP;
+}
+
+static void bpf_dummy_unreg(void *kdata)
+{
+}
+
+struct bpf_struct_ops bpf_bpf_dummy_ops = {
+ .verifier_ops = &bpf_dummy_verifier_ops,
+ .init = bpf_dummy_init,
+ .init_member = bpf_dummy_init_member,
+ .reg = bpf_dummy_reg,
+ .unreg = bpf_dummy_unreg,
+ .name = "bpf_dummy_ops",
+};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2eb0e55ef54d..46dd95755967 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 Facebook
*/
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -241,9 +242,11 @@ BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
BTF_SET_END(test_sk_kfunc_ids)
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
{
- return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
+ if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
+ return true;
+ return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
}
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
@@ -355,13 +358,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
return -EINVAL;
if (ctx_size_in) {
- info.ctx = kzalloc(ctx_size_in, GFP_USER);
- if (!info.ctx)
- return -ENOMEM;
- if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
- err = -EFAULT;
- goto out;
- }
+ info.ctx = memdup_user(ctx_in, ctx_size_in);
+ if (IS_ERR(info.ctx))
+ return PTR_ERR(info.ctx);
} else {
info.ctx = NULL;
}
@@ -389,7 +388,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
err = -EFAULT;
-out:
kfree(info.ctx);
return err;
}
@@ -483,11 +481,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
return -EINVAL;
/* priority is allowed */
-
- if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
- offsetof(struct __sk_buff, ifindex)))
- return -EINVAL;
-
+ /* ingress_ifindex is allowed */
/* ifindex is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
@@ -511,11 +505,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* gso_size is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
+ offsetof(struct __sk_buff, hwtstamp)))
+ return -EINVAL;
+
+ /* hwtstamp is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp),
sizeof(struct __sk_buff)))
return -EINVAL;
skb->mark = __skb->mark;
skb->priority = __skb->priority;
+ skb->skb_iif = __skb->ingress_ifindex;
skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
@@ -532,6 +533,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
return -EINVAL;
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
skb_shinfo(skb)->gso_size = __skb->gso_size;
+ skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
return 0;
}
@@ -545,13 +547,21 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
__skb->mark = skb->mark;
__skb->priority = skb->priority;
+ __skb->ingress_ifindex = skb->skb_iif;
__skb->ifindex = skb->dev->ifindex;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
__skb->wire_len = cb->pkt_len;
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
+ __skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp;
}
+static struct proto bpf_dummy_proto = {
+ .name = "bpf_dummy",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct sock),
+};
+
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -596,20 +606,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
- sk = kzalloc(sizeof(struct sock), GFP_USER);
+ sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
kfree(data);
kfree(ctx);
return -ENOMEM;
}
- sock_net_set(sk, net);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
kfree(ctx);
- kfree(sk);
+ sk_free(sk);
return -ENOMEM;
}
skb->sk = sk;
@@ -682,8 +691,7 @@ out:
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
- bpf_sk_storage_free(sk);
- kfree(sk);
+ sk_free(sk);
kfree(ctx);
return ret;
}
@@ -797,7 +805,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ret)
goto free_data;
- bpf_prog_change_xdp(NULL, prog);
+ if (repeat > 1)
+ bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
@@ -818,7 +827,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
sizeof(struct xdp_md));
out:
- bpf_prog_change_xdp(prog, NULL);
+ if (repeat > 1)
+ bpf_prog_change_xdp(prog, NULL);
free_data:
kfree(data);
free_ctx:
@@ -1037,13 +1047,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
return -EINVAL;
if (ctx_size_in) {
- ctx = kzalloc(ctx_size_in, GFP_USER);
- if (!ctx)
- return -ENOMEM;
- if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
- err = -EFAULT;
- goto out;
- }
+ ctx = memdup_user(ctx_in, ctx_size_in);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
}
rcu_read_lock_trace();
diff --git a/net/bridge/br.c b/net/bridge/br.c
index d3a32c6813e0..1fac72cc617f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -36,7 +36,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
bool changed_addr;
int err;
- if (dev->priv_flags & IFF_EBRIDGE) {
+ if (netif_is_bridge_master(dev)) {
err = br_vlan_bridge_event(dev, event, ptr);
if (err)
return notifier_from_errno(err);
@@ -349,7 +349,7 @@ static void __net_exit br_net_exit(struct net *net)
rtnl_lock();
for_each_netdev(net, dev)
- if (dev->priv_flags & IFF_EBRIDGE)
+ if (netif_is_bridge_master(dev))
br_dev_delete(dev, &list);
unregister_netdevice_many(&list);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 46812b659710..6ccda68bd473 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -32,10 +32,6 @@ static const struct rhashtable_params br_fdb_rht_params = {
};
static struct kmem_cache *br_fdb_cache __read_mostly;
-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid);
-static void fdb_notify(struct net_bridge *br,
- const struct net_bridge_fdb_entry *, int, bool);
int __init br_fdb_init(void)
{
@@ -87,6 +83,128 @@ static void fdb_rcu_free(struct rcu_head *head)
kmem_cache_free(br_fdb_cache, ent);
}
+static int fdb_to_nud(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb)
+{
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
+ return NUD_PERMANENT;
+ else if (test_bit(BR_FDB_STATIC, &fdb->flags))
+ return NUD_NOARP;
+ else if (has_expired(br, fdb))
+ return NUD_STALE;
+ else
+ return NUD_REACHABLE;
+}
+
+static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb,
+ u32 portid, u32 seq, int type, unsigned int flags)
+{
+ const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+ unsigned long now = jiffies;
+ struct nda_cacheinfo ci;
+ struct nlmsghdr *nlh;
+ struct ndmsg *ndm;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ndm = nlmsg_data(nlh);
+ ndm->ndm_family = AF_BRIDGE;
+ ndm->ndm_pad1 = 0;
+ ndm->ndm_pad2 = 0;
+ ndm->ndm_flags = 0;
+ ndm->ndm_type = 0;
+ ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
+ ndm->ndm_state = fdb_to_nud(br, fdb);
+
+ if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
+ ndm->ndm_flags |= NTF_OFFLOADED;
+ if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
+ ndm->ndm_flags |= NTF_EXT_LEARNED;
+ if (test_bit(BR_FDB_STICKY, &fdb->flags))
+ ndm->ndm_flags |= NTF_STICKY;
+
+ if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+ goto nla_put_failure;
+ ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+ ci.ndm_confirmed = 0;
+ ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+ ci.ndm_refcnt = 0;
+ if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
+ goto nla_put_failure;
+
+ if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
+ &fdb->key.vlan_id))
+ goto nla_put_failure;
+
+ if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
+ u8 notify_bits = FDB_NOTIFY_BIT;
+
+ if (!nest)
+ goto nla_put_failure;
+ if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+ notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
+
+ if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
+ nla_nest_cancel(skb, nest);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static inline size_t fdb_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(u32)) /* NDA_MASTER */
+ + nla_total_size(sizeof(u16)) /* NDA_VLAN */
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
+ + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
+}
+
+static void fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb, int type,
+ bool swdev_notify)
+{
+ struct net *net = dev_net(br->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ if (swdev_notify)
+ br_switchdev_fdb_notify(br, fdb, type);
+
+ skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ return;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+}
+
static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
const unsigned char *addr,
__u16 vid)
@@ -257,6 +375,66 @@ void br_fdb_find_delete_local(struct net_bridge *br,
spin_unlock_bh(&br->hash_lock);
}
+static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
+ struct net_bridge_port *source,
+ const unsigned char *addr,
+ __u16 vid,
+ unsigned long flags)
+{
+ struct net_bridge_fdb_entry *fdb;
+ int err;
+
+ fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
+ if (!fdb)
+ return NULL;
+
+ memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
+ WRITE_ONCE(fdb->dst, source);
+ fdb->key.vlan_id = vid;
+ fdb->flags = flags;
+ fdb->updated = fdb->used = jiffies;
+ err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode,
+ br_fdb_rht_params);
+ if (err) {
+ kmem_cache_free(br_fdb_cache, fdb);
+ return NULL;
+ }
+
+ hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
+
+ return fdb;
+}
+
+static int fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_bridge_fdb_entry *fdb;
+
+ if (!is_valid_ether_addr(addr))
+ return -EINVAL;
+
+ fdb = br_fdb_find(br, addr, vid);
+ if (fdb) {
+ /* it is okay to have multiple ports with same
+ * address, just use the first one.
+ */
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
+ return 0;
+ br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
+ source ? source->dev->name : br->dev->name, addr, vid);
+ fdb_delete(br, fdb, true);
+ }
+
+ fdb = fdb_create(br, source, addr, vid,
+ BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
+ if (!fdb)
+ return -ENOMEM;
+
+ fdb_add_hw_addr(br, addr);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, true);
+ return 0;
+}
+
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge_vlan_group *vg;
@@ -283,7 +461,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
insert:
/* insert new address, may fail if invalid address or dup. */
- fdb_insert(br, p, newaddr, 0);
+ fdb_add_local(br, p, newaddr, 0);
if (!vg || !vg->num_vlans)
goto done;
@@ -293,7 +471,7 @@ insert:
* from under us.
*/
list_for_each_entry(v, &vg->vlan_list, vlist)
- fdb_insert(br, p, newaddr, v->vid);
+ fdb_add_local(br, p, newaddr, v->vid);
done:
spin_unlock_bh(&br->hash_lock);
@@ -313,7 +491,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
!f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
- fdb_insert(br, NULL, newaddr, 0);
+ fdb_add_local(br, NULL, newaddr, 0);
vg = br_vlan_group(br);
if (!vg || !vg->num_vlans)
goto out;
@@ -328,7 +506,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
!f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
- fdb_insert(br, NULL, newaddr, v->vid);
+ fdb_add_local(br, NULL, newaddr, v->vid);
}
out:
spin_unlock_bh(&br->hash_lock);
@@ -503,71 +681,14 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
return num;
}
-static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
- struct net_bridge_port *source,
- const unsigned char *addr,
- __u16 vid,
- unsigned long flags)
-{
- struct net_bridge_fdb_entry *fdb;
-
- fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
- if (fdb) {
- memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
- WRITE_ONCE(fdb->dst, source);
- fdb->key.vlan_id = vid;
- fdb->flags = flags;
- fdb->updated = fdb->used = jiffies;
- if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
- &fdb->rhnode,
- br_fdb_rht_params)) {
- kmem_cache_free(br_fdb_cache, fdb);
- fdb = NULL;
- } else {
- hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
- }
- }
- return fdb;
-}
-
-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid)
-{
- struct net_bridge_fdb_entry *fdb;
-
- if (!is_valid_ether_addr(addr))
- return -EINVAL;
-
- fdb = br_fdb_find(br, addr, vid);
- if (fdb) {
- /* it is okay to have multiple ports with same
- * address, just use the first one.
- */
- if (test_bit(BR_FDB_LOCAL, &fdb->flags))
- return 0;
- br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
- source ? source->dev->name : br->dev->name, addr, vid);
- fdb_delete(br, fdb, true);
- }
-
- fdb = fdb_create(br, source, addr, vid,
- BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
- if (!fdb)
- return -ENOMEM;
-
- fdb_add_hw_addr(br, addr);
- fdb_notify(br, fdb, RTM_NEWNEIGH, true);
- return 0;
-}
-
/* Add entry for local address of interface */
-int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid)
+int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, u16 vid)
{
int ret;
spin_lock_bh(&br->hash_lock);
- ret = fdb_insert(br, source, addr, vid);
+ ret = fdb_add_local(br, source, addr, vid);
spin_unlock_bh(&br->hash_lock);
return ret;
}
@@ -638,182 +759,6 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
}
-static int fdb_to_nud(const struct net_bridge *br,
- const struct net_bridge_fdb_entry *fdb)
-{
- if (test_bit(BR_FDB_LOCAL, &fdb->flags))
- return NUD_PERMANENT;
- else if (test_bit(BR_FDB_STATIC, &fdb->flags))
- return NUD_NOARP;
- else if (has_expired(br, fdb))
- return NUD_STALE;
- else
- return NUD_REACHABLE;
-}
-
-static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
- const struct net_bridge_fdb_entry *fdb,
- u32 portid, u32 seq, int type, unsigned int flags)
-{
- const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- unsigned long now = jiffies;
- struct nda_cacheinfo ci;
- struct nlmsghdr *nlh;
- struct ndmsg *ndm;
-
- nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
- if (nlh == NULL)
- return -EMSGSIZE;
-
- ndm = nlmsg_data(nlh);
- ndm->ndm_family = AF_BRIDGE;
- ndm->ndm_pad1 = 0;
- ndm->ndm_pad2 = 0;
- ndm->ndm_flags = 0;
- ndm->ndm_type = 0;
- ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
- ndm->ndm_state = fdb_to_nud(br, fdb);
-
- if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
- ndm->ndm_flags |= NTF_OFFLOADED;
- if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
- ndm->ndm_flags |= NTF_EXT_LEARNED;
- if (test_bit(BR_FDB_STICKY, &fdb->flags))
- ndm->ndm_flags |= NTF_STICKY;
-
- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
- goto nla_put_failure;
- if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
- goto nla_put_failure;
- ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
- ci.ndm_confirmed = 0;
- ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
- ci.ndm_refcnt = 0;
- if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
- goto nla_put_failure;
-
- if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
- &fdb->key.vlan_id))
- goto nla_put_failure;
-
- if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
- struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
- u8 notify_bits = FDB_NOTIFY_BIT;
-
- if (!nest)
- goto nla_put_failure;
- if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
- notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
-
- if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
- nla_nest_cancel(skb, nest);
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, nest);
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-
-nla_put_failure:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-static inline size_t fdb_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ndmsg))
- + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
- + nla_total_size(sizeof(u32)) /* NDA_MASTER */
- + nla_total_size(sizeof(u16)) /* NDA_VLAN */
- + nla_total_size(sizeof(struct nda_cacheinfo))
- + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
- + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
-}
-
-static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
- const struct net_bridge_fdb_entry *fdb,
- unsigned long action, const void *ctx)
-{
- const struct net_bridge_port *p = READ_ONCE(fdb->dst);
- struct switchdev_notifier_fdb_info item;
- int err;
-
- item.addr = fdb->key.addr.addr;
- item.vid = fdb->key.vlan_id;
- item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
- item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
- item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
- item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
- item.info.ctx = ctx;
-
- err = nb->notifier_call(nb, action, &item);
- return notifier_to_errno(err);
-}
-
-int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
- struct notifier_block *nb)
-{
- struct net_bridge_fdb_entry *fdb;
- struct net_bridge *br;
- unsigned long action;
- int err = 0;
-
- if (!nb)
- return 0;
-
- if (!netif_is_bridge_master(br_dev))
- return -EINVAL;
-
- br = netdev_priv(br_dev);
-
- if (adding)
- action = SWITCHDEV_FDB_ADD_TO_DEVICE;
- else
- action = SWITCHDEV_FDB_DEL_TO_DEVICE;
-
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
- err = br_fdb_replay_one(br, nb, fdb, action, ctx);
- if (err)
- break;
- }
-
- rcu_read_unlock();
-
- return err;
-}
-
-static void fdb_notify(struct net_bridge *br,
- const struct net_bridge_fdb_entry *fdb, int type,
- bool swdev_notify)
-{
- struct net *net = dev_net(br->dev);
- struct sk_buff *skb;
- int err = -ENOBUFS;
-
- if (swdev_notify)
- br_switchdev_fdb_notify(br, fdb, type);
-
- skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
- if (skb == NULL)
- goto errout;
-
- err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
- return;
-errout:
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
-}
-
/* Dump information about entries, in response to GETNEIGH */
int br_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -825,7 +770,7 @@ int br_fdb_dump(struct sk_buff *skb,
struct net_bridge_fdb_entry *f;
int err = 0;
- if (!(dev->priv_flags & IFF_EBRIDGE))
+ if (!netif_is_bridge_master(dev))
return err;
if (!filter_dev) {
@@ -1076,7 +1021,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EINVAL;
}
- if (dev->priv_flags & IFF_EBRIDGE) {
+ if (netif_is_bridge_master(dev)) {
br = netdev_priv(dev);
vg = br_vlan_group(br);
} else {
@@ -1173,7 +1118,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_bridge *br;
int err;
- if (dev->priv_flags & IFF_EBRIDGE) {
+ if (netif_is_bridge_master(dev)) {
br = netdev_priv(dev);
vg = br_vlan_group(br);
} else {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4a02f8bb278a..c1183fef1f21 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -471,7 +471,7 @@ int br_del_bridge(struct net *net, const char *name)
if (dev == NULL)
ret = -ENXIO; /* Could not find device */
- else if (!(dev->priv_flags & IFF_EBRIDGE)) {
+ else if (!netif_is_bridge_master(dev)) {
/* Attempt to delete non bridge device! */
ret = -EPERM;
}
@@ -670,7 +670,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
else
netdev_set_rx_headroom(dev, br_hr);
- if (br_fdb_insert(br, p, dev->dev_addr, 0))
+ if (br_fdb_add_local(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
if (br->dev->addr_assign_type != NET_ADDR_SET) {
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 793b0db9d9a3..db4ab2c2ce18 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -26,7 +26,7 @@ static int get_bridge_ifindices(struct net *net, int *indices, int num)
for_each_netdev_rcu(net, dev) {
if (i >= num)
break;
- if (dev->priv_flags & IFF_EBRIDGE)
+ if (netif_is_bridge_master(dev))
indices[i++] = dev->ifindex;
}
rcu_read_unlock();
@@ -71,7 +71,8 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf,
num = br_fdb_fillbuf(br, buf, maxnum, offset);
if (num > 0) {
- if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry)))
+ if (copy_to_user(userbuf, buf,
+ array_size(num, sizeof(struct __fdb_entry))))
num = -EFAULT;
}
kfree(buf);
@@ -188,7 +189,7 @@ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user
return -ENOMEM;
get_port_ifindices(br, indices, num);
- if (copy_to_user(argp, indices, num * sizeof(int)))
+ if (copy_to_user(argp, indices, array_size(num, sizeof(int))))
num = -EFAULT;
kfree(indices);
return num;
@@ -336,7 +337,8 @@ static int old_deviceless(struct net *net, void __user *uarg)
args[2] = get_bridge_ifindices(net, indices, args[2]);
- ret = copy_to_user(uarg, indices, args[2]*sizeof(int))
+ ret = copy_to_user(uarg, indices,
+ array_size(args[2], sizeof(int)))
? -EFAULT : args[2];
kfree(indices);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 0281453f7766..4556d913955b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -422,7 +422,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = net->dev_base_seq;
for_each_netdev_rcu(net, dev) {
- if (dev->priv_flags & IFF_EBRIDGE) {
+ if (netif_is_bridge_master(dev)) {
struct net_bridge *br = netdev_priv(dev);
struct br_port_msg *bpm;
@@ -552,252 +552,16 @@ out:
return nlmsg_size;
}
-struct br_mdb_complete_info {
- struct net_bridge_port *port;
- struct br_ip ip;
-};
-
-static void br_mdb_complete(struct net_device *dev, int err, void *priv)
-{
- struct br_mdb_complete_info *data = priv;
- struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port_group *p;
- struct net_bridge_mdb_entry *mp;
- struct net_bridge_port *port = data->port;
- struct net_bridge *br = port->br;
-
- if (err)
- goto err;
-
- spin_lock_bh(&br->multicast_lock);
- mp = br_mdb_ip_get(br, &data->ip);
- if (!mp)
- goto out;
- for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
- pp = &p->next) {
- if (p->key.port != port)
- continue;
- p->flags |= MDB_PG_FLAGS_OFFLOAD;
- }
-out:
- spin_unlock_bh(&br->multicast_lock);
-err:
- kfree(priv);
-}
-
-static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
- const struct net_bridge_mdb_entry *mp)
-{
- if (mp->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
-#if IS_ENABLED(CONFIG_IPV6)
- else if (mp->addr.proto == htons(ETH_P_IPV6))
- ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
-#endif
- else
- ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
-
- mdb->vid = mp->addr.vid;
-}
-
-static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb,
- unsigned long action, const void *ctx,
- struct netlink_ext_ack *extack)
-{
- struct switchdev_notifier_port_obj_info obj_info = {
- .info = {
- .dev = dev,
- .extack = extack,
- .ctx = ctx,
- },
- .obj = &mdb->obj,
- };
- int err;
-
- err = nb->notifier_call(nb, action, &obj_info);
- return notifier_to_errno(err);
-}
-
-static int br_mdb_queue_one(struct list_head *mdb_list,
- enum switchdev_obj_id id,
- const struct net_bridge_mdb_entry *mp,
- struct net_device *orig_dev)
-{
- struct switchdev_obj_port_mdb *mdb;
-
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
-
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
-
- return 0;
-}
-
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
-{
- const struct net_bridge_mdb_entry *mp;
- struct switchdev_obj *obj, *tmp;
- struct net_bridge *br;
- unsigned long action;
- LIST_HEAD(mdb_list);
- int err = 0;
-
- ASSERT_RTNL();
-
- if (!nb)
- return 0;
-
- if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
- return -EINVAL;
-
- br = netdev_priv(br_dev);
-
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return 0;
-
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
- */
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
- struct net_bridge_port_group __rcu * const *pp;
- const struct net_bridge_port_group *p;
-
- if (mp->host_joined) {
- err = br_mdb_queue_one(&mdb_list,
- SWITCHDEV_OBJ_ID_HOST_MDB,
- mp, br_dev);
- if (err) {
- rcu_read_unlock();
- goto out_free_mdb;
- }
- }
-
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
- pp = &p->next) {
- if (p->key.port->dev != dev)
- continue;
-
- err = br_mdb_queue_one(&mdb_list,
- SWITCHDEV_OBJ_ID_PORT_MDB,
- mp, dev);
- if (err) {
- rcu_read_unlock();
- goto out_free_mdb;
- }
- }
- }
-
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
-
- list_for_each_entry(obj, &mdb_list, list) {
- err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- action, ctx, extack);
- if (err)
- goto out_free_mdb;
- }
-
-out_free_mdb:
- list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
- list_del(&obj->list);
- kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
- }
-
- return err;
-}
-
-static void br_mdb_switchdev_host_port(struct net_device *dev,
- struct net_device *lower_dev,
- struct net_bridge_mdb_entry *mp,
- int type)
-{
- struct switchdev_obj_port_mdb mdb = {
- .obj = {
- .id = SWITCHDEV_OBJ_ID_HOST_MDB,
- .flags = SWITCHDEV_F_DEFER,
- .orig_dev = dev,
- },
- };
-
- br_switchdev_mdb_populate(&mdb, mp);
-
- switch (type) {
- case RTM_NEWMDB:
- switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
- break;
- case RTM_DELMDB:
- switchdev_port_obj_del(lower_dev, &mdb.obj);
- break;
- }
-}
-
-static void br_mdb_switchdev_host(struct net_device *dev,
- struct net_bridge_mdb_entry *mp, int type)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(dev, lower_dev, iter)
- br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
-}
-
void br_mdb_notify(struct net_device *dev,
struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
int type)
{
- struct br_mdb_complete_info *complete_info;
- struct switchdev_obj_port_mdb mdb = {
- .obj = {
- .id = SWITCHDEV_OBJ_ID_PORT_MDB,
- .flags = SWITCHDEV_F_DEFER,
- },
- };
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- if (pg) {
- br_switchdev_mdb_populate(&mdb, mp);
-
- mdb.obj.orig_dev = pg->key.port->dev;
- switch (type) {
- case RTM_NEWMDB:
- complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
- if (!complete_info)
- break;
- complete_info->port = pg->key.port;
- complete_info->ip = mp->addr;
- mdb.obj.complete_priv = complete_info;
- mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
- kfree(complete_info);
- break;
- case RTM_DELMDB:
- switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
- break;
- }
- } else {
- br_mdb_switchdev_host(dev, mp, type);
- }
+ br_switchdev_mdb_notify(dev, mp, pg, type);
skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC);
if (!skb)
@@ -1016,7 +780,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (!(dev->priv_flags & IFF_EBRIDGE)) {
+ if (!netif_is_bridge_master(dev)) {
NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
return -EOPNOTSUPP;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3523c8c7068f..f3d751105343 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
int ifindex,
struct br_ip *saddr)
{
- lockdep_assert_held_once(&brmctx->br->multicast_lock);
-
write_seqcount_begin(&querier->seq);
querier->port_ifidx = ifindex;
memcpy(&querier->addr, saddr, sizeof(*saddr));
@@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip4_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip6_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
timer_setup(&brmctx->ip4_mc_router_timer,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 8edfb98ae1d5..b5af68c105a8 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -968,7 +968,7 @@ static int brnf_device_event(struct notifier_block *unused, unsigned long event,
struct net *net;
int ret;
- if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
+ if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev))
return NOTIFY_DONE;
ASSERT_RTNL();
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6c58fc14d2cb..0c8b5f1a15bc 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -106,7 +106,7 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev,
p = br_port_get_check_rcu(dev);
if (p)
vg = nbp_vlan_group_rcu(p);
- } else if (dev->priv_flags & IFF_EBRIDGE) {
+ } else if (netif_is_bridge_master(dev)) {
br = netdev_priv(dev);
vg = br_vlan_group_rcu(br);
}
@@ -1050,7 +1050,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
p = br_port_get_rtnl(dev);
/* We want to accept dev as bridge itself as well */
- if (!p && !(dev->priv_flags & IFF_EBRIDGE))
+ if (!p && !netif_is_bridge_master(dev))
return -EINVAL;
err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL);
@@ -1666,7 +1666,8 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
}
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
- nla_total_size(sizeof(struct br_mcast_stats)) +
+ nla_total_size_64bit(sizeof(struct br_mcast_stats)) +
+ (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) +
nla_total_size(0);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b4cef3a97f12..c0efd697865a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
struct bridge_mcast_querier {
struct br_ip addr;
int port_ifidx;
- seqcount_t seq;
+ seqcount_spinlock_t seq;
};
/* IGMP/MLD statistics */
@@ -767,8 +767,8 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
unsigned long off);
-int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid);
+int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, u16 vid);
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
@@ -792,8 +792,6 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid, bool offloaded);
-int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
- struct notifier_block *nb);
/* br_forward.c */
enum br_pkt_type {
@@ -958,9 +956,6 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
struct netlink_ext_ack *extack);
bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack);
int br_rports_fill_info(struct sk_buff *skb,
const struct net_bridge_mcast *brmctx);
int br_multicast_dump_querier_state(struct sk_buff *skb,
@@ -1125,9 +1120,7 @@ static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brm
static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
{
- /* use the RFC default of 2 for QRV */
- return 2 * brmctx->multicast_query_interval +
- brmctx->multicast_query_response_interval;
+ return brmctx->multicast_membership_interval;
}
static inline bool
@@ -1398,14 +1391,6 @@ static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
return false;
}
-static inline int br_mdb_replay(struct net_device *br_dev,
- struct net_device *dev, const void *ctx,
- bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
-{
- return -EOPNOTSUPP;
-}
-
static inline bool
br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
const struct net_bridge_mcast *brmctx2)
@@ -1463,9 +1448,6 @@ void br_vlan_notify(const struct net_bridge *br,
const struct net_bridge_port *p,
u16 vid, u16 vid_range,
int cmd);
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack);
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
@@ -1712,13 +1694,11 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
return true;
}
-static inline int br_vlan_replay(struct net_device *br_dev,
- struct net_device *dev, const void *ctx,
- bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
+static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
{
- return -EOPNOTSUPP;
+ return 0;
}
+
#endif
/* br_vlan_options.c */
@@ -1913,11 +1893,13 @@ static inline int br_cfm_status_fill_info(struct sk_buff *skb,
static inline int br_cfm_mep_count(struct net_bridge *br, u32 *count)
{
+ *count = 0;
return -EOPNOTSUPP;
}
static inline int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count)
{
+ *count = 0;
return -EOPNOTSUPP;
}
#endif
@@ -1991,6 +1973,10 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
struct netlink_ext_ack *extack);
void br_switchdev_fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type);
+void br_switchdev_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
@@ -2077,6 +2063,13 @@ br_switchdev_fdb_notify(struct net_bridge *br,
{
}
+static inline void br_switchdev_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
+{
+}
+
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index ba55851fe132..75204d36d7f9 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -233,7 +233,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
memcpy(br->bridge_id.addr, addr, ETH_ALEN);
- memcpy(br->dev->dev_addr, addr, ETH_ALEN);
+ eth_hw_addr_set(br->dev, addr);
list_for_each_entry(p, &br->port_list, list) {
if (ether_addr_equal(p->designated_bridge.addr, oldaddr))
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 6bf518d78f02..f8fbaaa7c501 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -4,6 +4,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <net/ip.h>
#include <net/switchdev.h>
#include "br_private.h"
@@ -122,28 +123,38 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
}
+static void br_switchdev_fdb_populate(struct net_bridge *br,
+ struct switchdev_notifier_fdb_info *item,
+ const struct net_bridge_fdb_entry *fdb,
+ const void *ctx)
+{
+ const struct net_bridge_port *p = READ_ONCE(fdb->dst);
+
+ item->addr = fdb->key.addr.addr;
+ item->vid = fdb->key.vlan_id;
+ item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+ item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+ item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
+ item->info.dev = (!p || item->is_local) ? br->dev : p->dev;
+ item->info.ctx = ctx;
+}
+
void
br_switchdev_fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type)
{
- const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- struct switchdev_notifier_fdb_info info = {
- .addr = fdb->key.addr.addr,
- .vid = fdb->key.vlan_id,
- .added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags),
- .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
- .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
- };
- struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
+ struct switchdev_notifier_fdb_info item;
+
+ br_switchdev_fdb_populate(br, &item, fdb, NULL);
switch (type) {
case RTM_DELNEIGH:
call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
- dev, &info.info, NULL);
+ item.info.dev, &item.info, NULL);
break;
case RTM_NEWNEIGH:
call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
- dev, &info.info, NULL);
+ item.info.dev, &item.info, NULL);
break;
}
}
@@ -270,6 +281,397 @@ static void nbp_switchdev_del(struct net_bridge_port *p)
}
}
+static int
+br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
+ const struct net_bridge_fdb_entry *fdb,
+ unsigned long action, const void *ctx)
+{
+ struct switchdev_notifier_fdb_info item;
+ int err;
+
+ br_switchdev_fdb_populate(br, &item, fdb, ctx);
+
+ err = nb->notifier_call(nb, action, &item);
+ return notifier_to_errno(err);
+}
+
+static int
+br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx,
+ bool adding, struct notifier_block *nb)
+{
+ struct net_bridge_fdb_entry *fdb;
+ struct net_bridge *br;
+ unsigned long action;
+ int err = 0;
+
+ if (!nb)
+ return 0;
+
+ if (!netif_is_bridge_master(br_dev))
+ return -EINVAL;
+
+ br = netdev_priv(br_dev);
+
+ if (adding)
+ action = SWITCHDEV_FDB_ADD_TO_DEVICE;
+ else
+ action = SWITCHDEV_FDB_DEL_TO_DEVICE;
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
+ err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx);
+ if (err)
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return err;
+}
+
+static int
+br_switchdev_vlan_replay_one(struct notifier_block *nb,
+ struct net_device *dev,
+ struct switchdev_obj_port_vlan *vlan,
+ const void *ctx, unsigned long action,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_port_obj_info obj_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ .ctx = ctx,
+ },
+ .obj = &vlan->obj,
+ };
+ int err;
+
+ err = nb->notifier_call(nb, action, &obj_info);
+ return notifier_to_errno(err);
+}
+
+static int br_switchdev_vlan_replay(struct net_device *br_dev,
+ struct net_device *dev,
+ const void *ctx, bool adding,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ unsigned long action;
+ int err = 0;
+ u16 pvid;
+
+ ASSERT_RTNL();
+
+ if (!nb)
+ return 0;
+
+ if (!netif_is_bridge_master(br_dev))
+ return -EINVAL;
+
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
+ return -EINVAL;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ p = NULL;
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (WARN_ON(!p))
+ return -EINVAL;
+ vg = nbp_vlan_group(p);
+ br = p->br;
+ }
+
+ if (!vg)
+ return 0;
+
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ pvid = br_get_pvid(vg);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.orig_dev = dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = br_vlan_flags(v, pvid),
+ .vid = v->vid,
+ };
+
+ if (!br_vlan_should_use(v))
+ continue;
+
+ err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
+ action, extack);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+struct br_switchdev_mdb_complete_info {
+ struct net_bridge_port *port;
+ struct br_ip ip;
+};
+
+static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv)
+{
+ struct br_switchdev_mdb_complete_info *data = priv;
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mdb_entry *mp;
+ struct net_bridge_port *port = data->port;
+ struct net_bridge *br = port->br;
+
+ if (err)
+ goto err;
+
+ spin_lock_bh(&br->multicast_lock);
+ mp = br_mdb_ip_get(br, &data->ip);
+ if (!mp)
+ goto out;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
+ if (p->key.port != port)
+ continue;
+ p->flags |= MDB_PG_FLAGS_OFFLOAD;
+ }
+out:
+ spin_unlock_bh(&br->multicast_lock);
+err:
+ kfree(priv);
+}
+
+static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
+ const struct net_bridge_mdb_entry *mp)
+{
+ if (mp->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (mp->addr.proto == htons(ETH_P_IPV6))
+ ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
+#endif
+ else
+ ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
+
+ mdb->vid = mp->addr.vid;
+}
+
+static void br_switchdev_host_mdb_one(struct net_device *dev,
+ struct net_device *lower_dev,
+ struct net_bridge_mdb_entry *mp,
+ int type)
+{
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_HOST_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ .orig_dev = dev,
+ },
+ };
+
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ switch (type) {
+ case RTM_NEWMDB:
+ switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(lower_dev, &mdb.obj);
+ break;
+ }
+}
+
+static void br_switchdev_host_mdb(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp, int type)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter)
+ br_switchdev_host_mdb_one(dev, lower_dev, mp, type);
+}
+
+static int
+br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
+ const struct switchdev_obj_port_mdb *mdb,
+ unsigned long action, const void *ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_port_obj_info obj_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ .ctx = ctx,
+ },
+ .obj = &mdb->obj,
+ };
+ int err;
+
+ err = nb->notifier_call(nb, action, &obj_info);
+ return notifier_to_errno(err);
+}
+
+static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ enum switchdev_obj_id id,
+ const struct net_bridge_mdb_entry *mp,
+ struct net_device *orig_dev)
+{
+ struct switchdev_obj_port_mdb *mdb;
+
+ mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
+ if (!mdb)
+ return -ENOMEM;
+
+ mdb->obj.id = id;
+ mdb->obj.orig_dev = orig_dev;
+ br_switchdev_mdb_populate(mdb, mp);
+ list_add_tail(&mdb->obj.list, mdb_list);
+
+ return 0;
+}
+
+void br_switchdev_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
+{
+ struct br_switchdev_mdb_complete_info *complete_info;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_PORT_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ },
+ };
+
+ if (!pg)
+ return br_switchdev_host_mdb(dev, mp, type);
+
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ mdb.obj.orig_dev = pg->key.port->dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
+ if (!complete_info)
+ break;
+ complete_info->port = pg->key.port;
+ complete_info->ip = mp->addr;
+ mdb.obj.complete_priv = complete_info;
+ mdb.obj.complete = br_switchdev_mdb_complete;
+ if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
+ kfree(complete_info);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
+ break;
+ }
+}
+#endif
+
+static int
+br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ const struct net_bridge_mdb_entry *mp;
+ struct switchdev_obj *obj, *tmp;
+ struct net_bridge *br;
+ unsigned long action;
+ LIST_HEAD(mdb_list);
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ if (!nb)
+ return 0;
+
+ if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+ return -EINVAL;
+
+ br = netdev_priv(br_dev);
+
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ return 0;
+
+ /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
+ * because the write-side protection is br->multicast_lock. But we
+ * need to emulate the [ blocking ] calling context of a regular
+ * switchdev event, so since both br->multicast_lock and RCU read side
+ * critical sections are atomic, we have no choice but to pick the RCU
+ * read side lock, queue up all our events, leave the critical section
+ * and notify switchdev from blocking context.
+ */
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ struct net_bridge_port_group __rcu * const *pp;
+ const struct net_bridge_port_group *p;
+
+ if (mp->host_joined) {
+ err = br_switchdev_mdb_queue_one(&mdb_list,
+ SWITCHDEV_OBJ_ID_HOST_MDB,
+ mp, br_dev);
+ if (err) {
+ rcu_read_unlock();
+ goto out_free_mdb;
+ }
+ }
+
+ for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ pp = &p->next) {
+ if (p->key.port->dev != dev)
+ continue;
+
+ err = br_switchdev_mdb_queue_one(&mdb_list,
+ SWITCHDEV_OBJ_ID_PORT_MDB,
+ mp, dev);
+ if (err) {
+ rcu_read_unlock();
+ goto out_free_mdb;
+ }
+ }
+ }
+
+ rcu_read_unlock();
+
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ list_for_each_entry(obj, &mdb_list, list) {
+ err = br_switchdev_mdb_replay_one(nb, dev,
+ SWITCHDEV_OBJ_PORT_MDB(obj),
+ action, ctx, extack);
+ if (err)
+ goto out_free_mdb;
+ }
+
+out_free_mdb:
+ list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
+ list_del(&obj->list);
+ kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
+ }
+
+ if (err)
+ return err;
+#endif
+
+ return 0;
+}
+
static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb,
@@ -279,15 +681,17 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
struct net_device *dev = p->dev;
int err;
- err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb,
+ extack);
if (err && err != -EOPNOTSUPP)
return err;
- err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
+ extack);
if (err && err != -EOPNOTSUPP)
return err;
- err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+ err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
return err;
@@ -302,11 +706,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
struct net_device *br_dev = p->br->dev;
struct net_device *dev = p->dev;
- br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+ br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
- br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+ br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
- br_fdb_replay(br_dev, ctx, false, atomic_nb);
+ br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 19f65ab91a02..49e105e0a447 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -293,7 +293,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
/* Add the dev mac and count the vlan only if it's usable */
if (br_vlan_should_use(v)) {
- err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
+ err = br_fdb_add_local(br, p, dev->dev_addr, v->vid);
if (err) {
br_err(br, "failed insert local address into bridge forwarding table\n");
goto out_filt;
@@ -683,8 +683,7 @@ static int br_vlan_add_existing(struct net_bridge *br,
goto err_flags;
}
/* It was only kept for port vlans, now make it real */
- err = br_fdb_insert(br, NULL, br->dev->dev_addr,
- vlan->vid);
+ err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid);
if (err) {
br_err(br, "failed to insert local address into bridge forwarding table\n");
goto err_fdb_insert;
@@ -1861,90 +1860,6 @@ out_kfree:
kfree_skb(skb);
}
-static int br_vlan_replay_one(struct notifier_block *nb,
- struct net_device *dev,
- struct switchdev_obj_port_vlan *vlan,
- const void *ctx, unsigned long action,
- struct netlink_ext_ack *extack)
-{
- struct switchdev_notifier_port_obj_info obj_info = {
- .info = {
- .dev = dev,
- .extack = extack,
- .ctx = ctx,
- },
- .obj = &vlan->obj,
- };
- int err;
-
- err = nb->notifier_call(nb, action, &obj_info);
- return notifier_to_errno(err);
-}
-
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
- const void *ctx, bool adding, struct notifier_block *nb,
- struct netlink_ext_ack *extack)
-{
- struct net_bridge_vlan_group *vg;
- struct net_bridge_vlan *v;
- struct net_bridge_port *p;
- struct net_bridge *br;
- unsigned long action;
- int err = 0;
- u16 pvid;
-
- ASSERT_RTNL();
-
- if (!nb)
- return 0;
-
- if (!netif_is_bridge_master(br_dev))
- return -EINVAL;
-
- if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
- return -EINVAL;
-
- if (netif_is_bridge_master(dev)) {
- br = netdev_priv(dev);
- vg = br_vlan_group(br);
- p = NULL;
- } else {
- p = br_port_get_rtnl(dev);
- if (WARN_ON(!p))
- return -EINVAL;
- vg = nbp_vlan_group(p);
- br = p->br;
- }
-
- if (!vg)
- return 0;
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
-
- pvid = br_get_pvid(vg);
-
- list_for_each_entry(v, &vg->vlan_list, vlist) {
- struct switchdev_obj_port_vlan vlan = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = br_vlan_flags(v, pvid),
- .vid = v->vid,
- };
-
- if (!br_vlan_should_use(v))
- continue;
-
- err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack);
- if (err)
- return err;
- }
-
- return err;
-}
-
/* check if v_curr can enter a range ending in range_end */
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end)
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index a7af4eaff17d..1a11064f9990 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
NFPROTO_BRIDGE, s->in, NULL, NULL,
s->net, NULL);
- ret = ebt_do_table(skb, &state, priv);
+ ret = ebt_do_table(priv, skb, &state);
if (ret != NF_DROP)
return ret;
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c0b121df4a9a..cb949436bc0e 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -58,28 +58,21 @@ static const struct ebt_table frame_filter = {
.me = THIS_MODULE,
};
-static unsigned int
-ebt_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops ebt_ops_filter[] = {
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 4078151c224f..5ee0531ae506 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -58,27 +58,21 @@ static const struct ebt_table frame_nat = {
.me = THIS_MODULE,
};
-static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops ebt_ops_nat[] = {
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_NAT_DST_OTHER,
},
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_NAT_SRC,
},
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 83d1798dfbb4..f2dbefb61ce8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e)
}
/* Do some firewalling */
-unsigned int ebt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct ebt_table *table)
+unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ struct ebt_table *table = priv;
unsigned int hook = state->hook;
int i, nentries;
struct ebt_entry *point;
@@ -926,7 +926,9 @@ static int translate_table(struct net *net, const char *name,
return -ENOMEM;
for_each_possible_cpu(i) {
newinfo->chainstack[i] =
- vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))));
+ vmalloc_node(array_size(udc_cnt,
+ sizeof(*(newinfo->chainstack[0]))),
+ cpu_to_node(i));
if (!newinfo->chainstack[i]) {
while (i)
vfree(newinfo->chainstack[--i]);
@@ -1071,7 +1073,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
*/
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
- repl->num_counters * sizeof(struct ebt_counter))) {
+ array_size(repl->num_counters, sizeof(struct ebt_counter)))) {
/* Silent error, can't fail, new table is already in place */
net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n");
}
@@ -1399,7 +1401,8 @@ static int do_update_counters(struct net *net, const char *name,
goto unlock_mutex;
}
- if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) {
+ if (copy_from_user(tmp, counters,
+ array_size(num_counters, sizeof(*counters)))) {
ret = -EFAULT;
goto unlock_mutex;
}
@@ -1532,7 +1535,7 @@ static int copy_counters_to_user(struct ebt_table *t,
write_unlock_bh(&t->lock);
if (copy_to_user(user, counterstmp,
- nentries * sizeof(struct ebt_counter)))
+ array_size(nentries, sizeof(struct ebt_counter))))
ret = -EFAULT;
vfree(counterstmp);
return ret;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index b02e1292f7f1..4be6b04879a1 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -81,7 +81,7 @@ static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
layr->up->ctrlcmd(layr->up, ctrl, layr->id);
}
-static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
+static struct cflayer *cfusbl_create(int phyid, const u8 ethaddr[ETH_ALEN],
u8 braddr[ETH_ALEN])
{
struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 37b67194c0df..414dc5671c45 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -53,20 +53,6 @@ struct chnl_net {
enum caif_states state;
};
-static void robust_list_del(struct list_head *delete_node)
-{
- struct list_head *list_node;
- struct list_head *n;
- ASSERT_RTNL();
- list_for_each_safe(list_node, n, &chnl_net_list) {
- if (list_node == delete_node) {
- list_del(list_node);
- return;
- }
- }
- WARN_ON(1);
-}
-
static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
{
struct sk_buff *skb;
@@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
strncpy(priv->name, dev->name, sizeof(priv->name));
+ INIT_LIST_HEAD(&priv->list_field);
return 0;
}
@@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- robust_list_del(&priv->list_field);
+ list_del_init(&priv->list_field);
}
static const struct net_device_ops netdev_ops = {
@@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void)
rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
- list_del(list_node);
+ list_del_init(list_node);
delete_device(dev);
}
rtnl_unlock();
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 508f67de0b80..bc88d901a1c0 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -625,7 +625,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
if (bcm_rx_thr_flush(op)) {
- hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
+ hrtimer_forward_now(hrtimer, op->kt_ival2);
return HRTIMER_RESTART;
} else {
/* rearm throttle handling */
diff --git a/net/can/isotp.c b/net/can/isotp.c
index caaa532ece94..df6968b28bf4 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -121,7 +121,7 @@ enum {
struct tpcon {
int idx;
int len;
- u8 state;
+ u32 state;
u8 bs;
u8 sn;
u8 ll_dl;
@@ -848,6 +848,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct isotp_sock *so = isotp_sk(sk);
+ u32 old_state = so->tx.state;
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf;
@@ -860,45 +861,55 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return -EADDRNOTAVAIL;
/* we do not support multiple buffers - for now */
- if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) {
- if (msg->msg_flags & MSG_DONTWAIT)
- return -EAGAIN;
+ if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE ||
+ wq_has_sleeper(&so->wait)) {
+ if (msg->msg_flags & MSG_DONTWAIT) {
+ err = -EAGAIN;
+ goto err_out;
+ }
/* wait for complete transmission of current pdu */
- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+ goto err_out;
}
- if (!size || size > MAX_MSG_LENGTH)
- return -EINVAL;
+ if (!size || size > MAX_MSG_LENGTH) {
+ err = -EINVAL;
+ goto err_out;
+ }
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
/* does the given data fit into a single frame for SF_BROADCAST? */
if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
- (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off))
- return -EINVAL;
+ (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
+ err = -EINVAL;
+ goto err_out;
+ }
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- return err;
+ goto err_out;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
- if (!dev)
- return -ENXIO;
+ if (!dev) {
+ err = -ENXIO;
+ goto err_out;
+ }
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- return err;
+ goto err_out;
}
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = dev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
- so->tx.state = ISOTP_SENDING;
so->tx.len = size;
so->tx.idx = 0;
@@ -954,15 +965,25 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- return err;
+ goto err_out;
}
if (wait_tx_done) {
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+
+ if (sk->sk_err)
+ return -sk->sk_err;
}
return size;
+
+err_out:
+ so->tx.state = old_state;
+ if (so->tx.state == ISOTP_IDLE)
+ wake_up_interruptible(&so->wait);
+
+ return err;
}
static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index f6df20808f5e..16af1a7f80f6 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -330,6 +330,7 @@ int j1939_session_activate(struct j1939_session *session);
void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
void j1939_session_timers_cancel(struct j1939_session *session);
+#define J1939_MIN_TP_PACKET_SIZE 9
#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 08c8606cfd9c..8452b0fbb78c 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
/* set default message type */
skcb->addr.type = J1939_TP;
+
+ if (!j1939_address_is_valid(skcb->addr.sa)) {
+ netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n",
+ __func__);
+ goto done;
+ }
+
if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
/* Type 1: with destination address */
skcb->addr.da = skcb->addr.pgn;
@@ -249,11 +256,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
struct j1939_priv *priv, *priv_new;
int ret;
- priv = j1939_priv_get_by_ndev(ndev);
+ spin_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
if (priv) {
kref_get(&priv->rx_kref);
+ spin_unlock(&j1939_netdev_lock);
return priv;
}
+ spin_unlock(&j1939_netdev_lock);
priv = j1939_priv_create(ndev);
if (!priv)
@@ -269,10 +279,10 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
/* Someone was faster than us, use their priv and roll
* back our's.
*/
+ kref_get(&priv_new->rx_kref);
spin_unlock(&j1939_netdev_lock);
dev_put(ndev);
kfree(priv);
- kref_get(&priv_new->rx_kref);
return priv_new;
}
j1939_priv_set(ndev, priv);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index bb5c4b8979be..a271688780a2 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1237,12 +1237,11 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
session->err = -ETIME;
j1939_session_deactivate(session);
} else {
- netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
- __func__, session);
-
j1939_session_list_lock(session->priv);
if (session->state >= J1939_SESSION_ACTIVE &&
session->state < J1939_SESSION_ACTIVE_MAX) {
+ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+ __func__, session);
j1939_session_get(session);
hrtimer_start(&session->rxtimer,
ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
@@ -1609,6 +1608,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
abort = J1939_XTP_ABORT_FAULT;
else if (len > priv->tp_max_packet_size)
abort = J1939_XTP_ABORT_RESOURCE;
+ else if (len < J1939_MIN_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
}
if (abort != J1939_XTP_NO_ABORT) {
@@ -1789,6 +1790,7 @@ static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
static void j1939_xtp_rx_dat_one(struct j1939_session *session,
struct sk_buff *skb)
{
+ enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT;
struct j1939_priv *priv = session->priv;
struct j1939_sk_buff_cb *skcb, *se_skcb;
struct sk_buff *se_skb = NULL;
@@ -1803,9 +1805,11 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
skcb = j1939_skb_to_cb(skb);
dat = skb->data;
- if (skb->len <= 1)
+ if (skb->len != 8) {
/* makes no sense */
+ abort = J1939_XTP_ABORT_UNEXPECTED_DATA;
goto out_session_cancel;
+ }
switch (session->last_cmd) {
case 0xff:
@@ -1904,7 +1908,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
out_session_cancel:
kfree_skb(se_skb);
j1939_session_timers_cancel(session);
- j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+ j1939_session_cancel(session, abort);
j1939_session_put(session);
}
@@ -2019,6 +2023,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
extd = J1939_ETP;
fallthrough;
case J1939_TP_CMD_BAM:
+ if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) {
+ netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n",
+ __func__, skcb->addr.sa);
+ return;
+ }
fallthrough;
case J1939_TP_CMD_RTS:
if (skcb->addr.type != extd)
@@ -2081,6 +2090,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
break;
case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+ if (j1939_cb_is_broadcast(skcb)) {
+ netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n",
+ __func__, skcb->addr.sa);
+ return;
+ }
+
if (j1939_tp_im_transmitter(skcb))
j1939_xtp_rx_abort(priv, skb, true);
diff --git a/net/core/Makefile b/net/core/Makefile
index 35ced6201814..4268846f2f47 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
+obj-$(CONFIG_OF) += of_net.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..ee290776c661 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
- sk_mem_charge(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..15ac064b5562 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -140,7 +140,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
-#include <linux/netfilter_ingress.h>
+#include <linux/netfilter_netdev.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
@@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
return NULL;
}
+bool netdev_name_in_use(struct net *net, const char *name)
+{
+ return netdev_name_node_lookup(net, name);
+}
+EXPORT_SYMBOL(netdev_name_in_use);
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
}
snprintf(buf, IFNAMSIZ, name, i);
- if (!__dev_get_by_name(net, buf))
+ if (!netdev_name_in_use(net, buf))
return i;
/* It is possible to run out of possible slots
@@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
if (strchr(name, '%'))
return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
+ else if (netdev_name_in_use(net, name))
return -EEXIST;
else if (dev->name != name)
strlcpy(dev->name, name, IFNAMSIZ);
@@ -1290,8 +1296,8 @@ rollback:
old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
- pr_err("%s: name change rollback failed: %d\n",
- dev->name, ret);
+ netdev_err(dev, "name change rollback failed: %d\n",
+ ret);
}
}
@@ -2345,7 +2351,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
/* If TC0 is invalidated disable TC mapping */
if (tc->offset + tc->count > txq) {
- pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
+ netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
dev->num_tc = 0;
return;
}
@@ -2356,8 +2362,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
tc = &dev->tc_to_txq[q];
if (tc->offset + tc->count > txq) {
- pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
- i, q);
+ netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
+ i, q);
netdev_set_prio_tc_map(dev, i, 0);
}
}
@@ -2921,6 +2927,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ dev_qdisc_change_real_num_tx(dev, txq);
+
dev->real_num_tx_queues = txq;
if (disabling) {
@@ -3163,6 +3171,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
qoffset = sb_dev->tc_to_txq[tc].offset;
qcount = sb_dev->tc_to_txq[tc].count;
+ if (unlikely(!qcount)) {
+ net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+ sb_dev->name, qoffset, tc);
+ qoffset = 0;
+ qcount = dev->real_num_tx_queues;
+ }
}
if (skb_rx_queue_recorded(skb)) {
@@ -3408,7 +3422,7 @@ EXPORT_SYMBOL(__skb_gso_segment);
#ifdef CONFIG_BUG
static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
- pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ netdev_err(dev, "hw csum failure\n");
skb_dump(KERN_ERR, skb, true);
dump_stack();
}
@@ -3906,7 +3920,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
skb->pkt_type = PACKET_LOOPBACK;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
netif_rx_ni(skb);
@@ -3918,6 +3933,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
+#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
@@ -3953,6 +3969,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
default:
break;
}
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
@@ -4146,13 +4163,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_egress(skb, &rc, dev);
+ if (!skb)
+ goto out;
+ }
+ nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
}
-# endif
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
@@ -5294,6 +5318,7 @@ skip_taps:
if (static_branch_unlikely(&ingress_needed_key)) {
bool another = false;
+ nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
if (another)
@@ -5301,6 +5326,7 @@ skip_taps:
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto out;
}
@@ -5837,7 +5863,7 @@ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int se
gro_normal_list(napi);
}
-static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5866,12 +5892,11 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
if (err) {
WARN_ON(&ptype->list == head);
kfree_skb(skb);
- return NET_RX_SUCCESS;
+ return;
}
out:
gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
- return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -6898,19 +6923,28 @@ EXPORT_SYMBOL(netif_napi_add);
void napi_disable(struct napi_struct *n)
{
+ unsigned long val, new;
+
might_sleep();
set_bit(NAPI_STATE_DISABLE, &n->state);
- while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
- msleep(1);
- while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
- msleep(1);
+ for ( ; ; ) {
+ val = READ_ONCE(n->state);
+ if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
+ usleep_range(20, 200);
+ continue;
+ }
+
+ new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
+ new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
+
+ if (cmpxchg(&n->state, val, new) == val)
+ break;
+ }
hrtimer_cancel(&n->timer);
- clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
- clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
@@ -6923,12 +6957,16 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
- if (n->dev->threaded && n->thread)
- set_bit(NAPI_STATE_THREADED, &n->state);
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+ new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+ if (n->dev->threaded && n->thread)
+ new |= NAPIF_STATE_THREADED;
+ } while (cmpxchg(&n->state, val, new) != val);
}
EXPORT_SYMBOL(napi_enable);
@@ -6984,8 +7022,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
}
if (unlikely(work > weight))
- pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
- n->poll, work, weight);
+ netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+ n->poll, work, weight);
if (likely(work < weight))
return work;
@@ -8539,8 +8577,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
dev->flags &= ~IFF_PROMISC;
else {
dev->promiscuity -= inc;
- pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
- dev->name);
+ netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
}
@@ -8610,8 +8647,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
dev->flags &= ~IFF_ALLMULTI;
else {
dev->allmulti -= inc;
- pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
- dev->name);
+ netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
}
@@ -9148,14 +9184,11 @@ int dev_get_port_parent_id(struct net_device *dev,
}
err = devlink_compat_switch_id_get(dev, ppid);
- if (!err || err != -EOPNOTSUPP)
+ if (!recurse || err != -EOPNOTSUPP)
return err;
- if (!recurse)
- return -EOPNOTSUPP;
-
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+ err = dev_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -9899,6 +9932,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
+ netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
if (features & NETIF_F_HW_TLS_TX) {
bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
@@ -10856,7 +10894,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
- nf_hook_ingress_init(dev);
+ nf_hook_netdev_init(dev);
return dev;
@@ -11142,7 +11180,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
* we can use it in the destination network namespace.
*/
err = -EEXIST;
- if (__dev_get_by_name(net, dev->name)) {
+ if (netdev_name_in_use(net, dev->name)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
@@ -11495,7 +11533,7 @@ static void __net_exit default_device_exit(struct net *net)
/* Push remaining network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
- if (__dev_get_by_name(&init_net, fb_name))
+ if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 8c39283c26ae..f0cb38344126 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ goto found_it;
+
while (*ins_point) {
int diff;
@@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
} else if (diff > 0) {
ins_point = &parent->rb_right;
} else {
+found_it:
if (exclusive)
return -EEXIST;
if (global) {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0e87237fd871..cbab5fec64b1 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -518,9 +518,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
case SIOCETHTOOL:
dev_load(net, ifr->ifr_name);
- rtnl_lock();
ret = dev_ethtool(net, ifr, data);
- rtnl_unlock();
if (colon)
*colon = ':';
return ret;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a856ae401ea5..5ba4f9434acd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -30,6 +30,63 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
+struct devlink {
+ u32 index;
+ struct list_head port_list;
+ struct list_head rate_list;
+ struct list_head sb_list;
+ struct list_head dpipe_table_list;
+ struct list_head resource_list;
+ struct list_head param_list;
+ struct list_head region_list;
+ struct list_head reporter_list;
+ struct mutex reporters_lock; /* protects reporter_list */
+ struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
+ struct list_head trap_policer_list;
+ const struct devlink_ops *ops;
+ u64 features;
+ struct xarray snapshot_ids;
+ struct devlink_dev_stats stats;
+ struct device *dev;
+ possible_net_t _net;
+ /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
+ struct mutex lock;
+ u8 reload_failed:1;
+ refcount_t refcount;
+ struct completion comp;
+ char priv[] __aligned(NETDEV_ALIGN);
+};
+
+void *devlink_priv(struct devlink *devlink)
+{
+ return &devlink->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_priv);
+
+struct devlink *priv_to_devlink(void *priv)
+{
+ return container_of(priv, struct devlink, priv);
+}
+EXPORT_SYMBOL_GPL(priv_to_devlink);
+
+struct device *devlink_to_dev(const struct devlink *devlink)
+{
+ return devlink->dev;
+}
+EXPORT_SYMBOL_GPL(devlink_to_dev);
+
static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
{
.name = "destination mac",
@@ -45,7 +102,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
{
@@ -62,7 +119,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
{
@@ -79,7 +136,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
@@ -95,6 +152,22 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define DEVLINK_REGISTERED XA_MARK_1
+/* devlink instances are open to the access from the user space after
+ * devlink_register() call. Such logical barrier allows us to have certain
+ * expectations related to locking.
+ *
+ * Before *_register() - we are in initialization stage and no parallel
+ * access possible to the devlink instance. All drivers perform that phase
+ * by implicitly holding device_lock.
+ *
+ * After *_register() - users and driver can access devlink instance at
+ * the same time.
+ */
+#define ASSERT_DEVLINK_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+
/* devlink_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -109,15 +182,17 @@ struct net *devlink_net(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_net);
-static void devlink_put(struct devlink *devlink)
+void devlink_put(struct devlink *devlink)
{
if (refcount_dec_and_test(&devlink->refcount))
complete(&devlink->comp);
}
-static bool __must_check devlink_try_get(struct devlink *devlink)
+struct devlink *__must_check devlink_try_get(struct devlink *devlink)
{
- return refcount_inc_not_zero(&devlink->refcount);
+ if (refcount_inc_not_zero(&devlink->refcount))
+ return devlink;
+ return NULL;
}
static struct devlink *devlink_get_from_attrs(struct net *net,
@@ -742,6 +817,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
int err;
WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -1040,11 +1116,15 @@ nla_put_failure:
static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1055,19 +1135,22 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_port->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1078,9 +1161,8 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_rate->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
@@ -3285,7 +3367,7 @@ void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
kfree(value[value_index].mask);
}
}
-EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
static int devlink_dpipe_entries_fill(struct genl_info *info,
enum devlink_command cmd, int flags,
@@ -3952,9 +4034,6 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
- if (!devlink->reload_enabled)
- return -EOPNOTSUPP;
-
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -4022,7 +4101,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
u32 actions_performed;
int err;
- if (!devlink_reload_supported(devlink->ops))
+ if (!(devlink->features & DEVLINK_F_RELOAD))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4150,6 +4229,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -4522,8 +4602,6 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
return -EOPNOTSUPP;
param_value[i] = param_item->driverinit_value;
} else {
- if (!param_item->published)
- continue;
ctx.cmode = i;
err = devlink_param_get(devlink, param, &ctx);
if (err)
@@ -4599,6 +4677,7 @@ static void devlink_param_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
cmd != DEVLINK_CMD_PORT_PARAM_DEL);
+ ASSERT_DEVLINK_REGISTERED(devlink);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -4848,47 +4927,6 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
info, DEVLINK_CMD_PARAM_NEW);
}
-static int devlink_param_register_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- if (devlink_param_find_by_name(param_list, param->name))
- return -EEXIST;
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
- param_item->param = param;
-
- list_add_tail(&param_item->list, param_list);
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-static void devlink_param_unregister_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_name(param_list, param->name);
- WARN_ON(!param_item);
- devlink_param_notify(devlink, port_index, param_item, cmd);
- list_del(&param_item->list);
- kfree(param_item);
-}
-
static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
@@ -5070,6 +5108,11 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+ region->max_snapshots);
+ if (err)
+ goto nla_put_failure;
+
err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
if (err)
goto nla_put_failure;
@@ -5145,17 +5188,19 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct devlink_snapshot *snapshot,
enum devlink_command cmd)
{
+ struct devlink *devlink = region->devlink;
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(region->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
/**
@@ -6269,23 +6314,21 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
return 0;
}
-int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
-int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
{
@@ -6296,14 +6339,13 @@ int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
{
@@ -6923,10 +6965,12 @@ genlmsg_cancel:
static void devlink_recover_notify(struct devlink_health_reporter *reporter,
enum devlink_command cmd)
{
+ struct devlink *devlink = reporter->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -6938,9 +6982,8 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(reporter->devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
void
@@ -8900,6 +8943,25 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
}
/**
+ * devlink_set_features - Set devlink supported features
+ *
+ * @devlink: devlink
+ * @features: devlink support features
+ *
+ * This interface allows us to set reload ops separatelly from
+ * the devlink_alloc.
+ */
+void devlink_set_features(struct devlink *devlink, u64 features)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(features & DEVLINK_F_RELOAD &&
+ !devlink_reload_supported(devlink->ops));
+ devlink->features = features;
+}
+EXPORT_SYMBOL_GPL(devlink_set_features);
+
+/**
* devlink_alloc_ns - Allocate new devlink instance resources
* in specific namespace
*
@@ -8958,18 +9020,104 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
}
EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd);
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd);
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd);
+
+static void devlink_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_param_item *param_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ list_for_each_entry(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+}
+
+static void devlink_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_param_item *param_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ list_for_each_entry_reverse(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+
+ list_for_each_entry_reverse(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+
+ list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+
+ list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+
+ list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+ list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+
+ list_for_each_entry_reverse(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
+
/**
* devlink_register - Register devlink instance
*
* @devlink: devlink
*/
-int devlink_register(struct devlink *devlink)
+void devlink_register(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ /* Make sure that we are in .probe() routine */
+
mutex_lock(&devlink_mutex);
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- devlink_notify(devlink, DEVLINK_CMD_NEW);
+ devlink_notify_register(devlink);
mutex_unlock(&devlink_mutex);
- return 0;
}
EXPORT_SYMBOL_GPL(devlink_register);
@@ -8980,60 +9128,28 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ ASSERT_DEVLINK_REGISTERED(devlink);
+ /* Make sure that we are in .remove() routine */
+
devlink_put(devlink);
wait_for_completion(&devlink->comp);
mutex_lock(&devlink_mutex);
- WARN_ON(devlink_reload_supported(devlink->ops) &&
- devlink->reload_enabled);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
+ devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
- * devlink_reload_enable - Enable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at end of device initialization
- * process when reload operation is supported.
- */
-void devlink_reload_enable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- devlink->reload_enabled = true;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_enable);
-
-/**
- * devlink_reload_disable - Disable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at the beginning of device cleanup
- * process when reload operation is supported.
- */
-void devlink_reload_disable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- /* Mutex is taken which ensures that no reload operation is in
- * progress while setting up forbidded flag.
- */
- devlink->reload_enabled = false;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_disable);
-
-/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
*/
void devlink_free(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->trap_policer_list));
@@ -9939,73 +10055,6 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
-static int __devlink_param_register_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command reg_cmd)
-{
- int err;
-
- err = devlink_param_verify(param);
- if (err)
- return err;
-
- return devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
-}
-
-static int __devlink_params_register(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *params,
- size_t params_count,
- enum devlink_command reg_cmd,
- enum devlink_command unreg_cmd)
-{
- const struct devlink_param *param = params;
- int i;
- int err;
-
- mutex_lock(&devlink->lock);
- for (i = 0; i < params_count; i++, param++) {
- err = __devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
- if (err)
- goto rollback;
- }
-
- mutex_unlock(&devlink->lock);
- return 0;
-
-rollback:
- if (!i)
- goto unlock;
- for (param--; i > 0; i--, param--)
- devlink_param_unregister_one(devlink, port_index, param_list,
- param, unreg_cmd);
-unlock:
- mutex_unlock(&devlink->lock);
- return err;
-}
-
-static void __devlink_params_unregister(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *params,
- size_t params_count,
- enum devlink_command cmd)
-{
- const struct devlink_param *param = params;
- int i;
-
- mutex_lock(&devlink->lock);
- for (i = 0; i < params_count; i++, param++)
- devlink_param_unregister_one(devlink, 0, param_list, param,
- cmd);
- mutex_unlock(&devlink->lock);
-}
-
/**
* devlink_params_register - register configuration parameters
*
@@ -10019,10 +10068,25 @@ int devlink_params_register(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count)
{
- return __devlink_params_register(devlink, 0, &devlink->param_list,
- params, params_count,
- DEVLINK_CMD_PARAM_NEW,
- DEVLINK_CMD_PARAM_DEL);
+ const struct devlink_param *param = params;
+ int i, err;
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ for (i = 0; i < params_count; i++, param++) {
+ err = devlink_param_register(devlink, param);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ if (!i)
+ return err;
+
+ for (param--; i > 0; i--, param--)
+ devlink_param_unregister(devlink, param);
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_params_register);
@@ -10036,9 +10100,13 @@ void devlink_params_unregister(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count)
{
- return __devlink_params_unregister(devlink, 0, &devlink->param_list,
- params, params_count,
- DEVLINK_CMD_PARAM_DEL);
+ const struct devlink_param *param = params;
+ int i;
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ for (i = 0; i < params_count; i++, param++)
+ devlink_param_unregister(devlink, param);
}
EXPORT_SYMBOL_GPL(devlink_params_unregister);
@@ -10054,13 +10122,26 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister);
int devlink_param_register(struct devlink *devlink,
const struct devlink_param *param)
{
- int err;
+ struct devlink_param_item *param_item;
- mutex_lock(&devlink->lock);
- err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
- param, DEVLINK_CMD_PARAM_NEW);
- mutex_unlock(&devlink->lock);
- return err;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ list_add_tail(&param_item->list, &devlink->param_list);
+ return 0;
}
EXPORT_SYMBOL_GPL(devlink_param_register);
@@ -10072,152 +10153,38 @@ EXPORT_SYMBOL_GPL(devlink_param_register);
void devlink_param_unregister(struct devlink *devlink,
const struct devlink_param *param)
{
- mutex_lock(&devlink->lock);
- devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
- DEVLINK_CMD_PARAM_DEL);
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_param_unregister);
-
-/**
- * devlink_params_publish - publish configuration parameters
- *
- * @devlink: devlink
- *
- * Publish previously registered configuration parameters.
- */
-void devlink_params_publish(struct devlink *devlink)
-{
- struct devlink_param_item *param_item;
-
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->published)
- continue;
- param_item->published = true;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_params_publish);
-
-/**
- * devlink_params_unpublish - unpublish configuration parameters
- *
- * @devlink: devlink
- *
- * Unpublish previously registered configuration parameters.
- */
-void devlink_params_unpublish(struct devlink *devlink)
-{
struct devlink_param_item *param_item;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (!param_item->published)
- continue;
- param_item->published = false;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_params_unpublish);
-
-/**
- * devlink_param_publish - publish one configuration parameter
- *
- * @devlink: devlink
- * @param: one configuration parameter
- *
- * Publish previously registered configuration parameter.
- */
-void devlink_param_publish(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || param_item->published)
- continue;
- param_item->published = true;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
- break;
- }
+ param_item =
+ devlink_param_find_by_name(&devlink->param_list, param->name);
+ WARN_ON(!param_item);
+ list_del(&param_item->list);
+ kfree(param_item);
}
-EXPORT_SYMBOL_GPL(devlink_param_publish);
+EXPORT_SYMBOL_GPL(devlink_param_unregister);
/**
- * devlink_param_unpublish - unpublish one configuration parameter
+ * devlink_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
*
- * @devlink: devlink
- * @param: one configuration parameter
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @init_val: value of parameter in driverinit configuration mode
*
- * Unpublish previously registered configuration parameter.
+ * This function should be used by the driver to get driverinit
+ * configuration for initialization after reload command.
*/
-void devlink_param_unpublish(struct devlink *devlink,
- const struct devlink_param *param)
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *init_val)
{
struct devlink_param_item *param_item;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || !param_item->published)
- continue;
- param_item->published = false;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(devlink_param_unpublish);
-
-/**
- * devlink_port_params_register - register port configuration parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- *
- * Register the configuration parameters supported by the port.
- */
-int devlink_port_params_register(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_register(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list, params,
- params_count,
- DEVLINK_CMD_PORT_PARAM_NEW,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_register);
-
-/**
- * devlink_port_params_unregister - unregister port configuration
- * parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- */
-void devlink_port_params_unregister(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_unregister(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- params, params_count,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
-
-static int
-__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink_param_item *param_item;
+ if (!devlink_reload_supported(devlink->ops))
+ return -EOPNOTSUPP;
- param_item = devlink_param_find_by_id(param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
if (!param_item)
return -EINVAL;
@@ -10233,54 +10200,6 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
return 0;
}
-
-static int
-__devlink_param_driverinit_value_set(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list, u32 param_id,
- union devlink_param_value init_val,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(param_list, param_id);
- if (!param_item)
- return -EINVAL;
-
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
- return -EOPNOTSUPP;
-
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, init_val.vstr);
- else
- param_item->driverinit_value = init_val;
- param_item->driverinit_value_valid = true;
-
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-/**
- * devlink_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
-{
- if (!devlink_reload_supported(devlink->ops))
- return -EOPNOTSUPP;
-
- return __devlink_param_driverinit_value_get(&devlink->param_list,
- param_id, init_val);
-}
EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
/**
@@ -10298,61 +10217,26 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
union devlink_param_value init_val)
{
- return __devlink_param_driverinit_value_set(devlink, 0,
- &devlink->param_list,
- param_id, init_val,
- DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+ struct devlink_param_item *param_item;
-/**
- * devlink_port_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink *devlink = devlink_port->devlink;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- if (!devlink_reload_supported(devlink->ops))
- return -EOPNOTSUPP;
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ if (!param_item)
+ return -EINVAL;
- return __devlink_param_driverinit_value_get(&devlink_port->param_list,
- param_id, init_val);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT))
+ return -EOPNOTSUPP;
-/**
- * devlink_port_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter to set for driverinit configuration mode
- *
- * This function should be used by the driver to set driverinit
- * configuration mode default value.
- */
-int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value init_val)
-{
- return __devlink_param_driverinit_value_set(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- param_id, init_val,
- DEVLINK_CMD_PORT_PARAM_NEW);
+ if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+ else
+ param_item->driverinit_value = init_val;
+ param_item->driverinit_value_valid = true;
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
/**
* devlink_param_value_changed - notify devlink on a parameter's value
@@ -10378,50 +10262,6 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
/**
- * devlink_port_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- *
- * This function should be used by the driver to notify devlink on value
- * change, excluding driverinit configuration mode.
- * For driverinit configuration mode driver should use the function
- * devlink_port_param_driverinit_value_set() instead.
- */
-void devlink_port_param_value_changed(struct devlink_port *devlink_port,
- u32 param_id)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink_port->param_list,
- param_id);
- WARN_ON(!param_item);
-
- devlink_param_notify(devlink_port->devlink, devlink_port->index,
- param_item, DEVLINK_CMD_PORT_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_value_changed);
-
-/**
- * devlink_param_value_str_fill - Safely fill-up the string preventing
- * from overflow of the preallocated buffer
- *
- * @dst_val: destination devlink_param_value
- * @src: source buffer
- */
-void devlink_param_value_str_fill(union devlink_param_value *dst_val,
- const char *src)
-{
- size_t len;
-
- len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE);
- WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE);
-}
-EXPORT_SYMBOL_GPL(devlink_param_value_str_fill);
-
-/**
* devlink_region_create - create a new address region
*
* @devlink: devlink
@@ -10839,6 +10679,8 @@ devlink_trap_group_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -10880,6 +10722,8 @@ static void devlink_trap_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
cmd != DEVLINK_CMD_TRAP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11261,6 +11105,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11429,45 +11275,36 @@ free_msg:
nlmsg_free(msg);
}
-void devlink_compat_running_version(struct net_device *dev,
- char *buf, size_t len)
+static struct devlink_port *netdev_to_devlink_port(struct net_device *dev)
{
- struct devlink *devlink;
+ if (!dev->netdev_ops->ndo_get_devlink_port)
+ return NULL;
- dev_hold(dev);
- rtnl_unlock();
+ return dev->netdev_ops->ndo_get_devlink_port(dev);
+}
- devlink = netdev_to_devlink(dev);
- if (!devlink || !devlink->ops->info_get)
- goto out;
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
mutex_lock(&devlink->lock);
__devlink_compat_running_version(devlink, buf, len);
mutex_unlock(&devlink->lock);
-
-out:
- rtnl_lock();
- dev_put(dev);
}
-int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
{
struct devlink_flash_update_params params = {};
- struct devlink *devlink;
int ret;
- dev_hold(dev);
- rtnl_unlock();
-
- devlink = netdev_to_devlink(dev);
- if (!devlink || !devlink->ops->flash_update) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
ret = request_firmware(&params.fw, file_name, devlink->dev);
if (ret)
- goto out;
+ return ret;
mutex_lock(&devlink->lock);
devlink_flash_update_begin_notify(devlink);
@@ -11477,10 +11314,6 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
release_firmware(params.fw);
-out:
- rtnl_lock();
- dev_put(dev);
-
return ret;
}
@@ -11538,7 +11371,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
if (!net_eq(devlink_net(devlink), net))
goto retry;
- WARN_ON(!devlink_reload_supported(devlink->ops));
+ WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e32cee2c469..e471c9b09670 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7765,6 +7765,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
+ if (type == BPF_WRITE || size != sizeof(__u64))
+ return false;
+ break;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -7774,6 +7778,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
+ case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ /* Explicitly prohibit access to padding in __sk_buff. */
+ return false;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -7802,6 +7809,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -7872,6 +7880,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8373,6 +8382,7 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8884,6 +8894,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, sk));
break;
+ case offsetof(struct __sk_buff, hwtstamp):
+ BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
+ BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
+
+ insn = bpf_convert_shinfo_access(si, insn);
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ hwtstamps, 8,
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -9735,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
- /* si->dst_reg = skb->data */
+ int reg;
+ int temp_reg_off = offsetof(struct sk_buff, cb) +
+ offsetof(struct sk_skb_cb, temp_reg);
+
+ if (si->src_reg == si->dst_reg) {
+ /* We need an extra register, choose and save a register. */
+ reg = BPF_REG_9;
+ if (si->src_reg == reg || si->dst_reg == reg)
+ reg--;
+ if (si->src_reg == reg || si->dst_reg == reg)
+ reg--;
+ *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
+ } else {
+ reg = si->dst_reg;
+ }
+
+ /* reg = skb->data */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
- si->dst_reg, si->src_reg,
+ reg, si->src_reg,
offsetof(struct sk_buff, data));
/* AX = skb->len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, len));
- /* si->dst_reg = skb->data + skb->len */
- *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+ /* reg = skb->data + skb->len */
+ *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
/* AX = skb->data_len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, data_len));
- /* si->dst_reg = skb->data + skb->len - skb->data_len */
- *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+
+ /* reg = skb->data + skb->len - skb->data_len */
+ *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
+
+ if (si->src_reg == si->dst_reg) {
+ /* Restore the saved register */
+ *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
+ *insn++ = BPF_MOV64_REG(si->dst_reg, reg);
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
+ }
return insn;
}
@@ -9761,11 +9806,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct __sk_buff, data_end):
insn = bpf_convert_data_end_access(si, insn);
break;
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetofend(struct __sk_buff, cb[4]) - 1:
+ BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
+ BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+ offsetof(struct sk_skb_cb, data)) %
+ sizeof(__u64));
+
+ prog->cb_access = 1;
+ off = si->off;
+ off -= offsetof(struct __sk_buff, cb[0]);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct sk_skb_cb, data);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
+ break;
+
+
default:
return bpf_convert_ctx_access(type, si, insn_buf, prog,
target_size);
@@ -10402,8 +10469,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
return -EINVAL;
if (unlikely(sk && sk_is_refcounted(sk)))
return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
- if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
- return -ESOCKTNOSUPPORT; /* reject connected sockets */
+ if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
+ return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
+ if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
+ return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
/* Check if socket is suitable for packet L3/L4 protocol */
if (sk && sk->sk_protocol != ctx->protocol)
@@ -10702,6 +10771,26 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
+BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
+{
+ /* unix_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct unix_sock);
+ if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
+ .func = bpf_skc_to_unix_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -10741,6 +10830,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_udp6_sock:
func = &bpf_skc_to_udp6_sock_proto;
break;
+ case BPF_FUNC_skc_to_unix_sock:
+ func = &bpf_skc_to_unix_sock_proto;
+ break;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bac0184cf3de..3255f57f5131 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1196,9 +1196,8 @@ proto_again:
break;
}
- proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
- switch (proto) {
+ switch (hdr->proto) {
case htons(PPP_IP):
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
@@ -1307,6 +1306,11 @@ ip_proto_again:
switch (ip_proto) {
case IPPROTO_GRE:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
target_container, data,
&proto, &nhoff, &hlen, flags);
@@ -1364,6 +1368,11 @@ ip_proto_again:
break;
}
case IPPROTO_IPIP:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
proto = htons(ETH_P_IP);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
@@ -1376,6 +1385,11 @@ ip_proto_again:
break;
case IPPROTO_IPV6:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
proto = htons(ETH_P_IPV6);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 8e582e29a41e..4fcbdd71c59f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -40,10 +40,10 @@
*/
struct net_rate_estimator {
- struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_basic_sync *bstats;
spinlock_t *stats_lock;
- seqcount_t *running;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ bool running;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
u8 ewma_log;
u8 intvl_log; /* period : (250ms << intvl_log) */
@@ -60,13 +60,13 @@ struct net_rate_estimator {
};
static void est_fetch_counters(struct net_rate_estimator *e,
- struct gnet_stats_basic_packed *b)
+ struct gnet_stats_basic_sync *b)
{
- memset(b, 0, sizeof(*b));
+ gnet_stats_basic_sync_init(b);
if (e->stats_lock)
spin_lock(e->stats_lock);
- __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
+ gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
if (e->stats_lock)
spin_unlock(e->stats_lock);
@@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e,
static void est_timer(struct timer_list *t)
{
struct net_rate_estimator *est = from_timer(est, t, timer);
- struct gnet_stats_basic_packed b;
+ struct gnet_stats_basic_sync b;
+ u64 b_bytes, b_packets;
u64 rate, brate;
est_fetch_counters(est, &b);
- brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+ b_bytes = u64_stats_read(&b.bytes);
+ b_packets = u64_stats_read(&b.packets);
+
+ brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
- rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+ rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
@@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t)
est->avpps += rate;
write_seqcount_end(&est->seq);
- est->last_bytes = b.bytes;
- est->last_packets = b.packets;
+ est->last_bytes = b_bytes;
+ est->last_packets = b_packets;
est->next_jiffies += ((HZ/4) << est->intvl_log);
@@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t)
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
- * @running: qdisc running seqcount
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ * internal values might change during basic reads. Only used
+ * if @bstats_cpu is NULL
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t)
* Returns 0 on success or a negative error code.
*
*/
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running,
+ bool running,
struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
struct net_rate_estimator *old, *est;
- struct gnet_stats_basic_packed b;
+ struct gnet_stats_basic_sync b;
int intvl_log;
if (nla_len(opt) < sizeof(*parm))
@@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
est_fetch_counters(est, &b);
if (lock)
local_bh_enable();
- est->last_bytes = b.bytes;
- est->last_packets = b.packets;
+ est->last_bytes = u64_stats_read(&b.bytes);
+ est->last_packets = u64_stats_read(&b.packets);
if (lock)
spin_lock_bh(lock);
@@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
- * @running: qdisc running seqcount (might be NULL)
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ * internal values might change during basic reads. Only used
+ * if @cpu_bstats is NULL
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
@@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
*
* Returns 0 on success or a negative error code.
*/
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt)
+ bool running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
lock, running, opt);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index e491b083b348..a10335b4ba2d 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -18,7 +18,7 @@
#include <linux/gen_stats.h>
#include <net/netlink.h>
#include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);
-static void
-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu)
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
{
+ u64_stats_set(&b->bytes, 0);
+ u64_stats_set(&b->packets, 0);
+ u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_sync_init);
+
+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu)
+{
+ u64 t_bytes = 0, t_packets = 0;
int i;
for_each_possible_cpu(i) {
- struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
u64 bytes, packets;
do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
- bytes = bcpu->bstats.bytes;
- packets = bcpu->bstats.packets;
+ bytes = u64_stats_read(&bcpu->bytes);
+ packets = u64_stats_read(&bcpu->packets);
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
- bstats->bytes += bytes;
- bstats->packets += packets;
+ t_bytes += bytes;
+ t_packets += packets;
+ }
+ _bstats_update(bstats, t_bytes, t_packets);
+}
+
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running)
+{
+ unsigned int start;
+ u64 bytes = 0;
+ u64 packets = 0;
+
+ WARN_ON_ONCE((cpu || running) && in_hardirq());
+
+ if (cpu) {
+ gnet_stats_add_basic_cpu(bstats, cpu);
+ return;
}
+ do {
+ if (running)
+ start = u64_stats_fetch_begin_irq(&b->syncp);
+ bytes = u64_stats_read(&b->bytes);
+ packets = u64_stats_read(&b->packets);
+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+ _bstats_update(bstats, bytes, packets);
}
+EXPORT_SYMBOL(gnet_stats_add_basic);
-void
-__gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running)
{
- unsigned int seq;
+ unsigned int start;
if (cpu) {
- __gnet_stats_copy_basic_cpu(bstats, cpu);
+ u64 t_bytes = 0, t_packets = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+ unsigned int start;
+ u64 bytes, packets;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+ bytes = u64_stats_read(&bcpu->bytes);
+ packets = u64_stats_read(&bcpu->packets);
+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+ t_bytes += bytes;
+ t_packets += packets;
+ }
+ *ret_bytes = t_bytes;
+ *ret_packets = t_packets;
return;
}
do {
if (running)
- seq = read_seqcount_begin(running);
- bstats->bytes = b->bytes;
- bstats->packets = b->packets;
- } while (running && read_seqcount_retry(running, seq));
+ start = u64_stats_fetch_begin_irq(&b->syncp);
+ *ret_bytes = u64_stats_read(&b->bytes);
+ *ret_packets = u64_stats_read(&b->packets);
+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
}
-EXPORT_SYMBOL(__gnet_stats_copy_basic);
static int
-___gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b,
- int type)
+___gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ int type, bool running)
{
- struct gnet_stats_basic_packed bstats = {0};
+ u64 bstats_bytes, bstats_packets;
- __gnet_stats_copy_basic(running, &bstats, cpu, b);
+ gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
- d->tc_stats.bytes = bstats.bytes;
- d->tc_stats.packets = bstats.packets;
+ d->tc_stats.bytes = bstats_bytes;
+ d->tc_stats.packets = bstats_packets;
}
if (d->tail) {
@@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
int res;
memset(&sb, 0, sizeof(sb));
- sb.bytes = bstats.bytes;
- sb.packets = bstats.packets;
+ sb.bytes = bstats_bytes;
+ sb.packets = bstats_packets;
res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
- if (res < 0 || sb.packets == bstats.packets)
+ if (res < 0 || sb.packets == bstats_packets)
return res;
/* emit 64bit stats only if needed */
- return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
- sizeof(bstats.packets), TCA_STATS_PAD);
+ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
+ sizeof(bstats_packets), TCA_STATS_PAD);
}
return 0;
}
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
- * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ * internal values might change during basic reads.
+ * Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
@@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ bool running)
{
- return ___gnet_stats_copy_basic(running, d, cpu, b,
- TCA_STATS_BASIC);
+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic);
/**
* gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
- * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ * internal values might change during basic reads.
+ * Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
@@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic_hw(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ bool running)
{
- return ___gnet_stats_copy_basic(running, d, cpu, b,
- TCA_STATS_BASIC_HW);
+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
@@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
}
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-static void
-__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *q)
+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *q)
{
int i;
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
- qstats->qlen = 0;
+ qstats->qlen += qcpu->backlog;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
@@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
}
}
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu,
- const struct gnet_stats_queue *q,
- __u32 qlen)
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q)
{
if (cpu) {
- __gnet_stats_copy_queue_cpu(qstats, cpu);
+ gnet_stats_add_queue_cpu(qstats, cpu);
} else {
- qstats->qlen = q->qlen;
- qstats->backlog = q->backlog;
- qstats->drops = q->drops;
- qstats->requeues = q->requeues;
- qstats->overlimits = q->overlimits;
+ qstats->qlen += q->qlen;
+ qstats->backlog += q->backlog;
+ qstats->drops += q->drops;
+ qstats->requeues += q->requeues;
+ qstats->overlimits += q->overlimits;
}
-
- qstats->qlen = qlen;
}
-EXPORT_SYMBOL(__gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_add_queue);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
{
struct gnet_stats_queue qstats = {0};
- __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
+ gnet_stats_add_queue(&qstats, cpu_q, q);
+ qstats.qlen = qlen;
if (d->compat_tc_stats) {
d->tc_stats.drops = qstats.drops;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2d5bc3a75fae..47931c8be04b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
list_del_init(&n->gc_list);
atomic_dec(&n->tbl->gc_entries);
}
+ if (!list_empty(&n->managed_list))
+ list_del_init(&n->managed_list);
}
static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
write_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
-
if (n->dead)
goto out;
@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
atomic_inc(&n->tbl->gc_entries);
}
+out:
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
+}
+static void neigh_update_managed_list(struct neighbour *n)
+{
+ bool on_managed_list, add_to_managed;
+
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
+ add_to_managed = n->flags & NTF_MANAGED;
+ on_managed_list = !list_empty(&n->managed_list);
+
+ if (!add_to_managed && on_managed_list)
+ list_del_init(&n->managed_list);
+ else if (add_to_managed && !on_managed_list)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
out:
write_unlock(&n->lock);
write_unlock_bh(&n->tbl->lock);
}
-static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
- int *notify)
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+ bool *gc_update, bool *managed_update)
{
- bool rc = false;
- u8 ndm_flags;
+ u32 ndm_flags, old_flags = neigh->flags;
if (!(flags & NEIGH_UPDATE_F_ADMIN))
- return rc;
+ return;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
- ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
- if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
neigh->flags |= NTF_EXT_LEARNED;
else
neigh->flags &= ~NTF_EXT_LEARNED;
- rc = true;
*notify = 1;
+ *gc_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+ if (ndm_flags & NTF_MANAGED)
+ neigh->flags |= NTF_MANAGED;
+ else
+ neigh->flags &= ~NTF_MANAGED;
+ *notify = 1;
+ *managed_update = true;
}
-
- return rc;
}
static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -379,7 +407,7 @@ EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl,
struct net_device *dev,
- bool exempt_from_gc)
+ u32 flags, bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
@@ -412,6 +440,7 @@ do_alloc:
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ n->flags = flags;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
timer_setup(&n->timer, neigh_timer_handler, 0);
@@ -421,6 +450,7 @@ do_alloc:
refcount_set(&n->refcnt, 1);
n->dead = 1;
INIT_LIST_HEAD(&n->gc_list);
+ INIT_LIST_HEAD(&n->managed_list);
atomic_inc(&tbl->entries);
out:
@@ -575,19 +605,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-static struct neighbour *___neigh_create(struct neigh_table *tbl,
- const void *pkey,
- struct net_device *dev,
- bool exempt_from_gc, bool want_ref)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, u32 flags,
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
- u32 hash_val;
- unsigned int key_len = tbl->key_len;
- int error;
+ u32 hash_val, key_len = tbl->key_len;
+ struct neighbour *n1, *rc, *n;
struct neigh_hash_table *nht;
+ int error;
+ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
-
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
@@ -650,7 +679,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
n->dead = 0;
if (!exempt_from_gc)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
-
+ if (n->flags & NTF_MANAGED)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
@@ -674,7 +704,7 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, false, want_ref);
+ return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
}
}
-
-
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
@@ -1217,7 +1245,8 @@ static void neigh_update_hhs(struct neighbour *neigh)
lladdr instead of overriding it
if it is different.
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
-
+ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
@@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags, u32 nlmsg_pid,
struct netlink_ext_ack *extack)
{
- bool ext_learn_change = false;
- u8 old;
- int err;
- int notify = 0;
- struct net_device *dev;
+ bool gc_update = false, managed_update = false;
int update_isrouter = 0;
+ struct net_device *dev;
+ int err, notify = 0;
+ u8 old;
trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
@@ -1254,7 +1281,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
- ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
+ neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+ if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
+ new = old & ~NUD_PERMANENT;
+ neigh->nud_state = new;
+ err = 0;
+ goto out;
+ }
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1399,15 +1432,13 @@ out:
if (update_isrouter)
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
-
- if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ if (((new ^ old) & NUD_PERMANENT) || gc_update)
neigh_update_gc_list(neigh);
-
+ if (managed_update)
+ neigh_update_managed_list(neigh);
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
-
trace_neigh_update_done(neigh, err);
-
return err;
}
@@ -1533,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_direct_output);
+static void neigh_managed_work(struct work_struct *work)
+{
+ struct neigh_table *tbl = container_of(work, struct neigh_table,
+ managed_work.work);
+ struct neighbour *neigh;
+
+ write_lock_bh(&tbl->lock);
+ list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+ neigh_event_send(neigh, NULL);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+ NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ write_unlock_bh(&tbl->lock);
+}
+
static void neigh_proxy_process(struct timer_list *t)
{
struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1679,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
INIT_LIST_HEAD(&tbl->parms_list);
INIT_LIST_HEAD(&tbl->gc_list);
+ INIT_LIST_HEAD(&tbl->managed_list);
+
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
@@ -1710,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
tbl->parms.reachable_time);
+ INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
@@ -1783,6 +1834,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
[NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
@@ -1855,7 +1907,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1864,6 +1916,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct neighbour *neigh;
void *dst, *lladdr;
u8 protocol = 0;
+ u32 ndm_flags;
int err;
ASSERT_RTNL();
@@ -1879,6 +1932,15 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
ndm = nlmsg_data(nlh);
+ ndm_flags = ndm->ndm_flags;
+ if (tb[NDA_FLAGS_EXT]) {
+ u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+ BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
+ (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
+ hweight32(NTF_EXT_MASK)));
+ ndm_flags |= (ext << NTF_EXT_SHIFT);
+ }
if (ndm->ndm_ifindex) {
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
@@ -1906,14 +1968,18 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
-
- if (ndm->ndm_flags & NTF_PROXY) {
+ if (ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
+ if (ndm_flags & NTF_MANAGED) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
+ goto out;
+ }
+
err = -ENOBUFS;
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
- pn->flags = ndm->ndm_flags;
+ pn->flags = ndm_flags;
if (protocol)
pn->protocol = protocol;
err = 0;
@@ -1933,16 +1999,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
- bool exempt_from_gc;
+ bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
+ bool exempt_from_gc = ndm_permanent ||
+ ndm_flags & NTF_EXT_LEARNED;
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
+ if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
+ err = -EINVAL;
+ goto out;
+ }
- exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
- ndm->ndm_flags & NTF_EXT_LEARNED;
- neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
+ neigh = ___neigh_create(tbl, dst, dev,
+ ndm_flags &
+ (NTF_EXT_LEARNED | NTF_MANAGED),
+ exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1961,22 +2035,22 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (protocol)
neigh->protocol = protocol;
-
- if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ if (ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-
- if (ndm->ndm_flags & NTF_ROUTER)
+ if (ndm_flags & NTF_ROUTER)
flags |= NEIGH_UPDATE_F_ISROUTER;
+ if (ndm_flags & NTF_MANAGED)
+ flags |= NEIGH_UPDATE_F_MANAGED;
+ if (ndm_flags & NTF_USE)
+ flags |= NEIGH_UPDATE_F_USE;
- if (ndm->ndm_flags & NTF_USE) {
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
neigh_event_send(neigh, NULL);
err = 0;
- } else
- err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid, extack);
-
+ }
neigh_release(neigh);
-
out:
return err;
}
@@ -2427,6 +2501,7 @@ out:
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
u32 pid, u32 seq, int type, unsigned int flags)
{
+ u32 neigh_flags, neigh_flags_ext;
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -2436,11 +2511,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+ neigh_flags = neigh->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = neigh->flags;
+ ndm->ndm_flags = neigh_flags;
ndm->ndm_type = neigh->type;
ndm->ndm_ifindex = neigh->dev->ifindex;
@@ -2471,6 +2549,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2484,6 +2564,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 pid, u32 seq, int type, unsigned int flags,
struct neigh_table *tbl)
{
+ u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
@@ -2491,11 +2572,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
+ neigh_flags = pn->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = pn->flags | NTF_PROXY;
+ ndm->ndm_flags = neigh_flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
@@ -2505,6 +2589,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2820,6 +2906,7 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
+ nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
@@ -2848,6 +2935,7 @@ static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index eab5fc88a002..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
- "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Interface| Receive "
- " | Transmit\n"
- " | bytes packets errs drop fifo frame "
- "compressed multicast| bytes packets errs "
- " drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-9s %ps\n",
+ seq_printf(seq, " %-8s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
+ if (v == SEQ_START_TOKEN)
return 0;
- }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..9c01c642cf9e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_carrier; this helps returning early
+ * without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_carrier)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_carrier);
}
@@ -196,6 +204,12 @@ static ssize_t speed_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_proto_down; this helps returning
+ * early without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_proto_down)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_proto_down);
}
NETDEVICE_SHOW_RW(proto_down, fmt_dec);
@@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The check is also done in dev_get_phys_port_id; this helps returning
+ * early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below. This works
+ * because recurse is false when calling dev_get_port_parent_id.
+ */
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -1226,6 +1275,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ /* The check is also done later; this helps returning early without
+ * hitting the trylock/restart below.
+ */
+ if (!dev->netdev_ops->ndo_set_tx_maxrate)
+ return -EOPNOTSUPP;
+
err = kstrtou32(buf, 10, &rate);
if (err < 0)
return err;
@@ -1869,7 +1924,7 @@ static struct class net_class __ro_after_init = {
.get_ownership = net_get_ownership,
};
-#ifdef CONFIG_OF_NET
+#ifdef CONFIG_OF
static int of_dev_node_match(struct device *dev, const void *data)
{
for (; dev; dev = dev->parent) {
@@ -1973,9 +2028,9 @@ int netdev_register_kobject(struct net_device *ndev)
int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
const struct net *net_new)
{
+ kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
+ kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
struct device *dev = &ndev->dev;
- kuid_t old_uid, new_uid;
- kgid_t old_gid, new_gid;
int error;
net_ns_get_ownership(net_old, &old_uid, &old_gid);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a448a9b5bb2d..202fa5eacd0f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -473,7 +473,9 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0) {
put_userns:
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(user_ns);
net_free(net);
dec_ucounts:
@@ -605,7 +607,9 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
dec_net_namespaces(net->ucounts);
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(net->user_ns);
net_free(net);
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b49c57d35a88..1a6a86693b74 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+ if (sock)
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
- spin_unlock(&cgroup_sk_update_lock);
- }
if (--ctx->batch == 0) {
ctx->batch = UPDATE_CLASSID_BATCH;
return n + 1;
@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
struct css_task_iter it;
struct task_struct *p;
- cgroup_sk_alloc_disable();
-
cs->classid = (u32)value;
css_task_iter_start(css, 0, &it);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 99a431c56f23..8456dfbe2eb4 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;
- cgroup_sk_alloc_disable();
-
rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio);
@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n)
{
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+
+ if (sock)
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
(unsigned long)v);
- spin_unlock(&cgroup_sk_update_lock);
- }
return 0;
}
@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
- cgroup_sk_alloc_disable();
-
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;
diff --git a/net/core/of_net.c b/net/core/of_net.c
new file mode 100644
index 000000000000..f1a9bf7578e7
--- /dev/null
+++ b/net/core/of_net.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OF helpers for network devices.
+ *
+ * Initially copied out of arch/powerpc/kernel/prom_parse.c
+ */
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/nvmem-consumer.h>
+
+/**
+ * of_get_phy_mode - Get phy mode for given device_node
+ * @np: Pointer to the given device_node
+ * @interface: Pointer to the result
+ *
+ * The function gets phy interface string from property 'phy-mode' or
+ * 'phy-connection-type'. The index in phy_modes table is set in
+ * interface and 0 returned. In case of error interface is set to
+ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
+ */
+int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
+{
+ const char *pm;
+ int err, i;
+
+ *interface = PHY_INTERFACE_MODE_NA;
+
+ err = of_property_read_string(np, "phy-mode", &pm);
+ if (err < 0)
+ err = of_property_read_string(np, "phy-connection-type", &pm);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
+ if (!strcasecmp(pm, phy_modes(i))) {
+ *interface = i;
+ return 0;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(of_get_phy_mode);
+
+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
+{
+ struct property *pp = of_find_property(np, name, NULL);
+
+ if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+ memcpy(addr, pp->value, ETH_ALEN);
+ return 0;
+ }
+ return -ENODEV;
+}
+
+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+{
+ struct platform_device *pdev = of_find_device_by_node(np);
+ struct nvmem_cell *cell;
+ const void *mac;
+ size_t len;
+ int ret;
+
+ /* Try lookup by device first, there might be a nvmem_cell_lookup
+ * associated with a given device.
+ */
+ if (pdev) {
+ ret = nvmem_get_mac_address(&pdev->dev, addr);
+ put_device(&pdev->dev);
+ return ret;
+ }
+
+ cell = of_nvmem_cell_get(np, "mac-address");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ mac = nvmem_cell_read(cell, &len);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
+ }
+
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
+
+ return 0;
+}
+
+/**
+ * of_get_mac_address()
+ * @np: Caller's Device Node
+ * @addr: Pointer to a six-byte array for the result
+ *
+ * Search the device tree for the best MAC address to use. 'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree. If any
+ * of the above isn't set, then try to get MAC address from nvmem cell named
+ * 'mac-address'.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot. For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses. Some older U-Boots only initialized 'local-mac-address'. In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ *
+ * Return: 0 on success and errno in case of error.
+*/
+int of_get_mac_address(struct device_node *np, u8 *addr)
+{
+ int ret;
+
+ if (!np)
+ return -ENODEV;
+
+ ret = of_get_mac_addr(np, "mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "local-mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "address", addr);
+ if (!ret)
+ return 0;
+
+ return of_get_mac_addr_nvmem(np, addr);
+}
+EXPORT_SYMBOL(of_get_mac_address);
+
+/**
+ * of_get_ethdev_address()
+ * @np: Caller's Device Node
+ * @dev: Pointer to netdevice which address will be updated
+ *
+ * Search the device tree for the best MAC address to use.
+ * If found set @dev->dev_addr to that address.
+ *
+ * See documentation of of_get_mac_address() for more information on how
+ * the best address is determined.
+ *
+ * Return: 0 on success and errno in case of error.
+ */
+int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
+{
+ u8 addr[ETH_ALEN];
+ int ret;
+
+ ret = of_get_mac_address(np, addr);
+ if (!ret)
+ eth_hw_addr_set(dev, addr);
+ return ret;
+}
+EXPORT_SYMBOL(of_get_ethdev_address);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1a6978427d6c..9b60e4301a44 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -49,6 +49,12 @@ static int page_pool_init(struct page_pool *pool,
* which is the XDP_TX use-case.
*/
if (pool->p.flags & PP_FLAG_DMA_MAP) {
+ /* DMA-mapping is not supported on 32-bit systems with
+ * 64-bit DMA mapping.
+ */
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ return -EOPNOTSUPP;
+
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
@@ -69,10 +75,6 @@ static int page_pool_init(struct page_pool *pool,
*/
}
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
- pool->p.flags & PP_FLAG_PAGE_FRAG)
- return -EINVAL;
-
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 972c8cb303a5..2af8aeeadadf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -301,7 +301,7 @@ int rtnl_unregister(int protocol, int msgtype)
}
link = rtnl_dereference(tab[msgindex]);
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
rtnl_unlock();
kfree_rcu(link, rcu);
@@ -337,7 +337,7 @@ void rtnl_unregister_all(int protocol)
if (!link)
continue;
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock();
@@ -3204,8 +3204,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev->mtu = mtu;
}
if (tb[IFLA_ADDRESS]) {
- memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
- nla_len(tb[IFLA_ADDRESS]));
+ __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]),
+ nla_len(tb[IFLA_ADDRESS]));
dev->addr_assign_type = NET_ADDR_SET;
}
if (tb[IFLA_BROADCAST])
@@ -3804,9 +3804,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- size_t if_info_size;
- skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
+ skb = nlmsg_new(if_nlmsg_size(dev, 0), flags);
if (skb == NULL)
goto errout;
@@ -4384,7 +4383,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (br_dev != netdev_master_upper_dev_get(dev) &&
- !(dev->priv_flags & IFF_EBRIDGE))
+ !netif_is_bridge_master(dev))
continue;
cops = ops;
}
@@ -5262,7 +5261,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 9077fa969892..acb1ee97bbd3 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -15,8 +15,8 @@
#include <net/udp.h>
struct net_packet_attrs {
- unsigned char *src;
- unsigned char *dst;
+ const unsigned char *src;
+ const unsigned char *dst;
u32 ip_src;
u32 ip_dst;
bool tcp;
@@ -173,8 +173,8 @@ static int net_test_loopback_validate(struct sk_buff *skb,
struct net_device *orig_ndev)
{
struct net_test_priv *tpriv = pt->af_packet_priv;
- unsigned char *src = tpriv->packet->src;
- unsigned char *dst = tpriv->packet->dst;
+ const unsigned char *src = tpriv->packet->src;
+ const unsigned char *dst = tpriv->packet->dst;
struct netsfhdr *shdr;
struct ethhdr *ehdr;
struct udphdr *uhdr;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2170bea2c7de..ba2f38246f07 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#include <net/mctp.h>
#include <net/page_pool.h>
#include <linux/uaccess.h>
@@ -80,6 +81,7 @@
#include <linux/indirect_call_wrapper.h>
#include "datagram.h"
+#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
@@ -134,34 +136,31 @@ struct napi_alloc_cache {
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
-static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
-}
-
-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
-{
fragsz = SKB_DATA_ALIGN(fragsz);
- return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
- struct page_frag_cache *nc;
void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
if (in_hardirq() || irqs_disabled()) {
- nc = this_cpu_ptr(&netdev_alloc_cache);
+ struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
+
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
} else {
+ struct napi_alloc_cache *nc;
+
local_bh_disable();
- data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ nc = this_cpu_ptr(&napi_alloc_cache);
+ data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
local_bh_enable();
}
return data;
@@ -397,8 +396,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- u8 *data;
+ unsigned int osize;
bool pfmemalloc;
+ u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
? skbuff_fclone_cache : skbuff_head_cache;
@@ -430,7 +430,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- size = SKB_WITH_OVERHEAD(ksize(data));
+ osize = ksize(data);
+ size = SKB_WITH_OVERHEAD(osize);
prefetchw(data + size);
/*
@@ -439,7 +440,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, 0);
+ __build_skb_around(skb, data, osize);
skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
@@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
int delta = headroom - skb_headroom(skb);
+ int osize = skb_end_offset(skb);
+ struct sock *sk = skb->sk;
if (WARN_ONCE(delta <= 0,
"%s is expecting an increase in the headroom", __func__))
return skb;
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
+ delta = SKB_DATA_ALIGN(delta);
+ /* pskb_expand_head() might crash, if skb is shared. */
+ if (skb_shared(skb) || !is_skb_wmem(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
+ if (unlikely(!nskb))
+ goto fail;
+
+ if (sk)
+ skb_set_owner_w(nskb, sk);
+ consume_skb(skb);
skb = nskb;
}
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
+ if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
+ goto fail;
+
+ if (sk && is_skb_wmem(skb)) {
+ delta = skb_end_offset(skb) - osize;
+ refcount_add(delta, &sk->sk_wmem_alloc);
+ skb->truesize += delta;
}
return skb;
+
+fail:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL(skb_expand_head);
@@ -3423,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
+ const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
- skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+ skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
@@ -3439,19 +3450,7 @@ EXPORT_SYMBOL(skb_split);
*/
static int skb_prepare_for_shift(struct sk_buff *skb)
{
- int ret = 0;
-
- if (skb_cloned(skb)) {
- /* Save and restore truesize: pskb_expand_head() may reallocate
- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
- * cannot change truesize at this point.
- */
- unsigned int save_truesize = skb->truesize;
-
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- skb->truesize = save_truesize;
- }
- return ret;
+ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
}
/**
@@ -4431,6 +4430,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_MPTCP)
[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+ [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4448,6 +4450,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_MPTCP)
skb_ext_type_len[SKB_EXT_MPTCP] +
#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+ skb_ext_type_len[SKB_EXT_MCTP] +
+#endif
0;
}
@@ -6520,6 +6525,14 @@ static void skb_ext_put_sp(struct sec_path *sp)
}
#endif
+#ifdef CONFIG_MCTP_FLOWS
+static void skb_ext_put_mctp(struct mctp_flow *flow)
+{
+ if (flow->key)
+ mctp_key_unref(flow->key);
+}
+#endif
+
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
struct skb_ext *ext = skb->extensions;
@@ -6555,6 +6568,10 @@ free_now:
if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
#endif
+#ifdef CONFIG_MCTP_FLOWS
+ if (__skb_ext_exist(ext, SKB_EXT_MCTP))
+ skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
+#endif
kmem_cache_free(skbuff_ext_cache, ext);
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..1ae52ac943f6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+bool sk_msg_is_readable(struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (likely(psock))
+ empty = list_empty(&psock->ingress_msg);
+ rcu_read_unlock();
+ return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
@@ -494,6 +508,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+ u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
struct sk_msg *msg)
@@ -507,11 +522,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
*/
if (skb_linearize(skb))
return -EAGAIN;
- num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (unlikely(num_sge < 0))
return num_sge;
- copied = skb->len;
+ copied = len;
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
@@ -522,9 +537,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
return copied;
}
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len);
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
@@ -535,7 +552,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb);
+ return sk_psock_skb_ingress_self(psock, skb, off, len);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -547,7 +564,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
kfree(msg);
return err;
@@ -557,7 +574,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* skb. In this case we do not need to check memory limits or skb_set_owner_r
* because the skb is already accounted for here.
*/
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len)
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -567,7 +585,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
return -EAGAIN;
sk_msg_init(msg);
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
kfree(msg);
return err;
@@ -581,7 +599,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb);
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -624,6 +642,12 @@ static void sk_psock_backlog(struct work_struct *work)
while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len;
off = 0;
+ if (skb_bpf_strparser(skb)) {
+ struct strp_msg *stm = strp_msg(skb);
+
+ off = stm->offset;
+ len = stm->full_len;
+ }
start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
@@ -863,6 +887,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
* return code, but then didn't set a redirect interface.
*/
if (unlikely(!sk_other)) {
+ skb_bpf_redirect_clear(skb);
sock_drop(from->sk, skb);
return -EIO;
}
@@ -930,6 +955,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
{
struct sock *sk_other;
int err = 0;
+ u32 len, off;
switch (verdict) {
case __SK_PASS:
@@ -937,6 +963,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ skb_bpf_redirect_clear(skb);
goto out_free;
}
@@ -949,7 +976,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
* retrying later from workqueue.
*/
if (skb_queue_empty(&psock->ingress_skb)) {
- err = sk_psock_skb_ingress_self(psock, skb);
+ len = skb->len;
+ off = 0;
+ if (skb_bpf_strparser(skb)) {
+ struct strp_msg *stm = strp_msg(skb);
+
+ off = stm->offset;
+ len = stm->full_len;
+ }
+ err = sk_psock_skb_ingress_self(psock, skb, off, len);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
@@ -1015,6 +1050,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
+ if (ret == SK_PASS)
+ skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..8f2b2f2c0e7b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -350,7 +350,7 @@ void sk_error_report(struct sock *sk)
}
EXPORT_SYMBOL(sk_error_report);
-static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
+int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -379,12 +379,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
*(struct __kernel_sock_timeval *)optval = tv;
return sizeof(tv);
}
+EXPORT_SYMBOL(sock_get_timeout);
-static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
- bool old_timeval)
+int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
+ sockptr_t optval, int optlen, bool old_timeval)
{
- struct __kernel_sock_timeval tv;
-
if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32;
@@ -393,8 +392,8 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
- tv.tv_sec = tv32.tv_sec;
- tv.tv_usec = tv32.tv_usec;
+ tv->tv_sec = tv32.tv_sec;
+ tv->tv_usec = tv32.tv_usec;
} else if (old_timeval) {
struct __kernel_old_timeval old_tv;
@@ -402,14 +401,28 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return -EINVAL;
if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
- tv.tv_sec = old_tv.tv_sec;
- tv.tv_usec = old_tv.tv_usec;
+ tv->tv_sec = old_tv.tv_sec;
+ tv->tv_usec = old_tv.tv_usec;
} else {
- if (optlen < sizeof(tv))
+ if (optlen < sizeof(*tv))
return -EINVAL;
- if (copy_from_sockptr(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(tv, optval, sizeof(*tv)))
return -EFAULT;
}
+
+ return 0;
+}
+EXPORT_SYMBOL(sock_copy_user_timeval);
+
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
+{
+ struct __kernel_sock_timeval tv;
+ int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+
+ if (err)
+ return err;
+
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
return -EDOM;
@@ -947,6 +960,53 @@ void sock_set_mark(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(sock_set_mark);
+static void sock_release_reserved_memory(struct sock *sk, int bytes)
+{
+ /* Round down bytes to multiple of pages */
+ bytes &= ~(SK_MEM_QUANTUM - 1);
+
+ WARN_ON(bytes > sk->sk_reserved_mem);
+ sk->sk_reserved_mem -= bytes;
+ sk_mem_reclaim(sk);
+}
+
+static int sock_reserve_memory(struct sock *sk, int bytes)
+{
+ long allocated;
+ bool charged;
+ int pages;
+
+ if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
+ return -EOPNOTSUPP;
+
+ if (!bytes)
+ return 0;
+
+ pages = sk_mem_pages(bytes);
+
+ /* pre-charge to memcg */
+ charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!charged)
+ return -ENOMEM;
+
+ /* pre-charge to forward_alloc */
+ allocated = sk_memory_allocated_add(sk, pages);
+ /* If the system goes into memory pressure with this
+ * precharge, give up and return error.
+ */
+ if (allocated > sk_prot_mem_limits(sk, 1)) {
+ sk_memory_allocated_sub(sk, pages);
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
+ return -ENOMEM;
+ }
+ sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT;
+
+ sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT;
+
+ return 0;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -1367,6 +1427,23 @@ set_sndbuf:
~SOCK_BUF_LOCK_MASK);
break;
+ case SO_RESERVE_MEM:
+ {
+ int delta;
+
+ if (val < 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ delta = val - sk->sk_reserved_mem;
+ if (delta < 0)
+ sock_release_reserved_memory(sk, -delta);
+ else
+ ret = sock_reserve_memory(sk, delta);
+ break;
+ }
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1376,6 +1453,16 @@ set_sndbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+ const struct cred *cred;
+
+ spin_lock(&sk->sk_peer_lock);
+ cred = get_cred(sk->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ return cred;
+}
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
@@ -1552,7 +1639,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1560,20 +1651,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS:
{
+ const struct cred *cred;
int ret, n;
- if (!sk->sk_peer_cred)
+ cred = sk_get_peer_cred(sk);
+ if (!cred)
return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups;
+ n = cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
+ put_cred(cred);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval,
- sk->sk_peer_cred->group_info);
+ ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ put_cred(cred);
if (ret)
return ret;
goto lenout;
@@ -1733,6 +1827,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
break;
+ case SO_RESERVE_MEM:
+ v.val = sk->sk_reserved_mem;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1935,9 +2033,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -2045,6 +2144,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+ newsk->sk_reserved_mem = 0;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
@@ -3145,6 +3245,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -3179,17 +3281,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3212,42 +3312,37 @@ void release_sock(struct sock *sk)
}
EXPORT_SYMBOL(release_sock);
-/**
- * lock_sock_fast - fast version of lock_sock
- * @sk: socket
- *
- * This version should be used for very small section, where process wont block
- * return false if fast path is taken:
- *
- * sk_lock.slock locked, owned = 0, BH disabled
- *
- * return true if slow path is taken:
- *
- * sk_lock.slock unlocked, owned = 1, BH enabled
- */
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
-EXPORT_SYMBOL(lock_sock_fast);
+EXPORT_SYMBOL(__lock_sock_fast);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32)
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
new file mode 100644
index 000000000000..2f396e6bfba5
--- /dev/null
+++ b/net/core/sock_destructor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
+#define _NET_CORE_SOCK_DESTRUCTOR_H
+#include <net/tcp.h>
+
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+ return skb->destructor == sock_wfree ||
+ skb->destructor == __sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+#endif
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e252b8ec2b85..f39ef79ced67 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
-static bool sk_is_tcp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_STREAM &&
- sk->sk_protocol == IPPROTO_TCP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
diff --git a/net/core/stream.c b/net/core/stream.c
index 4f1d4aa5fb38..06b36c730ce8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -195,14 +195,11 @@ void sk_stream_kill_queues(struct sock *sk)
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Next, the error queue. */
- __skb_queue_purge(&sk->sk_error_queue);
-
/* Next, the write queue. */
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
- sk_mem_reclaim(sk);
+ sk_mem_reclaim_final(sk);
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..5f88526ad61c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
.extra1 = &long_one,
- .extra2 = &long_max,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{
diff --git a/net/core/xdp.c b/net/core/xdp.c
index cc92ccb38432..5ddc29f29bad 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -143,8 +143,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
if (xdp_rxq->reg_state == REG_STATE_UNUSED)
return;
- WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
-
xdp_rxq_info_unreg_mem_model(xdp_rxq);
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index c5c1d2b8045e..5183e627468d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -48,7 +48,7 @@ extern bool dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
-extern struct percpu_counter dccp_orphan_count;
+DECLARE_PER_CPU(unsigned int, dccp_orphan_count);
void dccp_time_wait(struct sock *sk, int state, int timeo);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c5c74a34d139..91e7a2202697 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index abb5c596a817..fc44dadc778b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -42,8 +42,8 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
-struct percpu_counter dccp_orphan_count;
-EXPORT_SYMBOL_GPL(dccp_orphan_count);
+DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
+EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
struct inet_hashinfo dccp_hashinfo;
EXPORT_SYMBOL_GPL(dccp_hashinfo);
@@ -1055,7 +1055,7 @@ adjudge_to_death:
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk));
- percpu_counter_inc(sk->sk_prot->orphan_count);
+ this_cpu_inc(dccp_orphan_count);
/* Have we already been destroyed by a softirq or backlog? */
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
@@ -1115,13 +1115,10 @@ static int __init dccp_init(void)
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
sizeof_field(struct sk_buff, cb));
- rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
- if (rc)
- goto out_fail;
inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
- goto out_free_percpu;
+ goto out_fail;
rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
@@ -1226,8 +1223,6 @@ out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
-out_free_percpu:
- percpu_counter_destroy(&dccp_orphan_count);
out_fail:
dccp_hashinfo.bhash = NULL;
dccp_hashinfo.ehash = NULL;
@@ -1250,7 +1245,6 @@ static void __exit dccp_fini(void)
dccp_ackvec_exit();
dccp_sysctl_exit();
inet_hashinfo2_free_mod(&dccp_hashinfo);
- percpu_counter_destroy(&dccp_orphan_count);
}
module_init(dccp_init);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..8cb87b5067ee 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -92,17 +92,8 @@ config NET_DSA_TAG_KSZ
Say Y if you want to enable support for tagging frames for the
Microchip 8795/9477/9893 families of switches.
-config NET_DSA_TAG_RTL4_A
- tristate "Tag driver for Realtek 4 byte protocol A tags"
- help
- Say Y or M if you want to enable support for tagging frames for the
- Realtek switches with 4 byte protocol A tags, sich as found in
- the Realtek RTL8366RB.
-
config NET_DSA_TAG_OCELOT
tristate "Tag driver for Ocelot family of switches, using NPI port"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
select PACKING
help
Say Y or M if you want to enable NPI tagging for the Ocelot switches
@@ -114,8 +105,6 @@ config NET_DSA_TAG_OCELOT
config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -130,6 +119,19 @@ config NET_DSA_TAG_QCA
Say Y or M if you want to enable support for tagging frames for
the Qualcomm Atheros QCA8K switches.
+config NET_DSA_TAG_RTL4_A
+ tristate "Tag driver for Realtek 4 byte protocol A tags"
+ help
+ Say Y or M if you want to enable support for tagging frames for the
+ Realtek switches with 4 byte protocol A tags, sich as found in
+ the Realtek RTL8366RB.
+
+config NET_DSA_TAG_RTL8_4
+ tristate "Tag driver for Realtek 8 byte protocol 4 tags"
+ help
+ Say Y or M if you want to enable support for tagging frames for Realtek
+ switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
+
config NET_DSA_TAG_LAN9303
tristate "Tag driver for SMSC/Microchip LAN9303 family of switches"
help
@@ -138,7 +140,6 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 67ea009f242c..9f75820e7c98 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -10,12 +10,13 @@ obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
-obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
+obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1dc45e40f961..ea5169e671ae 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -280,23 +280,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
}
#ifdef CONFIG_PM_SLEEP
-static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+static bool dsa_port_is_initialized(const struct dsa_port *dp)
{
- const struct dsa_port *dp = dsa_to_port(ds, p);
-
return dp->type == DSA_PORT_TYPE_USER && dp->slave;
}
int dsa_switch_suspend(struct dsa_switch *ds)
{
- int i, ret = 0;
+ struct dsa_port *dp;
+ int ret = 0;
/* Suspend slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
continue;
- ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave);
+ ret = dsa_slave_suspend(dp->slave);
if (ret)
return ret;
}
@@ -310,7 +309,8 @@ EXPORT_SYMBOL_GPL(dsa_switch_suspend);
int dsa_switch_resume(struct dsa_switch *ds)
{
- int i, ret = 0;
+ struct dsa_port *dp;
+ int ret = 0;
if (ds->ops->resume)
ret = ds->ops->resume(ds);
@@ -319,11 +319,11 @@ int dsa_switch_resume(struct dsa_switch *ds)
return ret;
/* Resume slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
continue;
- ret = dsa_slave_resume(dsa_to_port(ds, i)->slave);
+ ret = dsa_slave_resume(dp->slave);
if (ret)
return ret;
}
@@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
+void dsa_flush_workqueue(void)
+{
+ flush_workqueue(dsa_owq);
+}
+
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 1b2b25d7bd02..826957b6442b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -170,7 +170,7 @@ void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
/* Check if the bridge is still in use, otherwise it is time
* to clean it up so we can reuse this bridge_num later.
*/
- if (!dsa_bridge_num_find(bridge_dev))
+ if (dsa_bridge_num_find(bridge_dev) < 0)
clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
}
@@ -399,11 +399,8 @@ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
if (!dsa_port_is_cpu(cpu_dp))
continue;
- list_for_each_entry(dp, &dst->ports, list) {
- /* Prefer a local CPU port */
- if (dp->ds != cpu_dp->ds)
- continue;
-
+ /* Prefer a local CPU port */
+ dsa_switch_for_each_port(dp, cpu_dp->ds) {
/* Prefer the first local CPU port found */
if (dp->cpu_dp)
continue;
@@ -429,15 +426,23 @@ static int dsa_port_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
bool dsa_port_enabled = false;
int err = 0;
if (dp->setup)
return 0;
+ mutex_init(&dp->addr_lists_lock);
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -480,8 +485,11 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
dsa_port_link_unregister_of(dp);
- if (err)
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
return err;
+ }
dp->setup = true;
@@ -533,11 +541,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a, *tmp;
if (!dp->setup)
return;
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
devlink_port_type_clear(dlp);
switch (dp->type) {
@@ -581,6 +593,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp)
dp->devlink_port_setup = false;
}
+/* Destroy the current devlink port, and create a new one which has the UNUSED
+ * flavour. At this point, any call to ds->ops->port_setup has been already
+ * balanced out by a call to ds->ops->port_teardown, so we know that any
+ * devlink port regions the driver had are now unregistered. We then call its
+ * ds->ops->port_setup again, in order for the driver to re-create them on the
+ * new devlink port.
+ */
+static int dsa_port_reinit_as_unused(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ dsa_port_devlink_teardown(dp);
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ if (ds->ops->port_setup) {
+ /* On error, leave the devlink port registered,
+ * dsa_switch_teardown will clean it up later.
+ */
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int dsa_devlink_info_get(struct devlink *dl,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
@@ -758,16 +800,17 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
{
const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
struct dsa_switch_tree *dst = ds->dst;
- int port, err;
+ struct dsa_port *cpu_dp;
+ int err;
if (tag_ops->proto == dst->default_proto)
return 0;
- for (port = 0; port < ds->num_ports; port++) {
- if (!dsa_is_cpu_port(ds, port))
- continue;
-
- err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ rtnl_lock();
+ err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
+ tag_ops->proto);
+ rtnl_unlock();
if (err) {
dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
tag_ops->name, ERR_PTR(err));
@@ -804,19 +847,13 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dl_priv = devlink_priv(ds->devlink);
dl_priv->ds = ds;
- err = devlink_register(ds->devlink);
- if (err)
- goto free_devlink;
-
/* Setup devlink port instances now, so that the switch
* setup() can register regions etc, against the ports
*/
- list_for_each_entry(dp, &ds->dst->ports, list) {
- if (dp->ds == ds) {
- err = dsa_port_devlink_setup(dp);
- if (err)
- goto unregister_devlink_ports;
- }
+ dsa_switch_for_each_port(dp, ds) {
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ goto unregister_devlink_ports;
}
err = dsa_switch_register_notifier(ds);
@@ -833,10 +870,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err)
goto teardown;
- devlink_params_publish(ds->devlink);
-
if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ ds->slave_mii_bus = mdiobus_alloc();
if (!ds->slave_mii_bus) {
err = -ENOMEM;
goto teardown;
@@ -846,27 +881,26 @@ static int dsa_switch_setup(struct dsa_switch *ds)
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto teardown;
+ goto free_slave_mii_bus;
}
ds->setup = true;
-
+ devlink_register(ds->devlink);
return 0;
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
teardown:
if (ds->ops->teardown)
ds->ops->teardown(ds);
unregister_notifier:
dsa_switch_unregister_notifier(ds);
unregister_devlink_ports:
- list_for_each_entry(dp, &ds->dst->ports, list)
- if (dp->ds == ds)
- dsa_port_devlink_teardown(dp);
- devlink_unregister(ds->devlink);
-free_devlink:
+ dsa_switch_for_each_port(dp, ds)
+ dsa_port_devlink_teardown(dp);
devlink_free(ds->devlink);
ds->devlink = NULL;
-
return err;
}
@@ -877,19 +911,23 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
if (!ds->setup)
return;
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
+ if (ds->devlink)
+ devlink_unregister(ds->devlink);
- dsa_switch_unregister_notifier(ds);
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
+ mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
if (ds->ops->teardown)
ds->ops->teardown(ds);
+ dsa_switch_unregister_notifier(ds);
+
if (ds->devlink) {
- list_for_each_entry(dp, &ds->dst->ports, list)
- if (dp->ds == ds)
- dsa_port_devlink_teardown(dp);
- devlink_unregister(ds->devlink);
+ dsa_switch_for_each_port(dp, ds)
+ dsa_port_devlink_teardown(dp);
devlink_free(ds->devlink);
ds->devlink = NULL;
}
@@ -897,6 +935,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->setup = false;
}
+/* First tear down the non-shared, then the shared ports. This ensures that
+ * all work items scheduled by our switchdev handlers for user ports have
+ * completed before we destroy the refcounting kept on the shared ports.
+ */
+static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
+ dsa_port_teardown(dp);
+
+ dsa_flush_workqueue();
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
+ dsa_port_teardown(dp);
+}
+
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
+}
+
static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -911,38 +976,22 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err) {
- dsa_port_devlink_teardown(dp);
- dp->type = DSA_PORT_TYPE_UNUSED;
- err = dsa_port_devlink_setup(dp);
+ err = dsa_port_reinit_as_unused(dp);
if (err)
goto teardown;
- continue;
}
}
return 0;
teardown:
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
+ dsa_tree_teardown_ports(dst);
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
+ dsa_tree_teardown_switches(dst);
return err;
}
-static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
-}
-
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -1034,6 +1083,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
+ dsa_tree_teardown_ports(dst);
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
dsa_tree_teardown_cpu_ports(dst);
@@ -1052,6 +1102,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_master(dst);
+ dsa_tree_teardown_ports(dst);
+
dsa_tree_teardown_switches(dst);
dsa_tree_teardown_cpu_ports(dst);
@@ -1091,7 +1143,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
goto out_unlock;
list_for_each_entry(dp, &dst->ports, list) {
- if (!dsa_is_user_port(dp->ds, dp->index))
+ if (!dsa_port_is_user(dp))
continue;
if (dp->slave->flags & IFF_UP)
@@ -1122,8 +1174,8 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
struct dsa_switch_tree *dst = ds->dst;
struct dsa_port *dp;
- list_for_each_entry(dp, &dst->ports, list)
- if (dp->ds == ds && dp->index == index)
+ dsa_switch_for_each_port(dp, ds)
+ if (dp->index == index)
return dp;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
@@ -1308,12 +1360,15 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
for_each_available_child_of_node(ports, port) {
err = of_property_read_u32(port, "reg", &reg);
- if (err)
+ if (err) {
+ of_node_put(port);
goto out_put_node;
+ }
if (reg >= ds->num_ports) {
dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
port, reg, ds->num_ports);
+ of_node_put(port);
err = -EINVAL;
goto out_put_node;
}
@@ -1321,8 +1376,10 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
dp = dsa_to_port(ds, reg);
err = dsa_port_parse_of(dp, port);
- if (err)
+ if (err) {
+ of_node_put(port);
goto out_put_node;
+ }
}
out_put_node:
@@ -1464,12 +1521,9 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
static void dsa_switch_release_ports(struct dsa_switch *ds)
{
- struct dsa_switch_tree *dst = ds->dst;
struct dsa_port *dp, *next;
- list_for_each_entry_safe(dp, next, &dst->ports, list) {
- if (dp->ds != ds)
- continue;
+ dsa_switch_for_each_port_safe(dp, next, ds) {
list_del(&dp->list);
kfree(dp);
}
@@ -1546,3 +1600,47 @@ void dsa_unregister_switch(struct dsa_switch *ds)
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+ struct net_device *master, *slave_dev;
+ LIST_HEAD(unregister_list);
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+ rtnl_lock();
+
+ dsa_switch_for_each_user_port(dp, ds) {
+ master = dp->cpu_dp->master;
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ /* Just unlinking ourselves as uppers of the master is not
+ * sufficient. When the master net device unregisters, that will
+ * also call dev_close, which we will catch as NETDEV_GOING_DOWN
+ * and trigger a dev_close on our own devices (dsa_slave_close).
+ * In turn, that will call dev_mc_unsync on the master's net
+ * device. If the master is also a DSA switch port, this will
+ * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
+ * its own master. Lockdep will complain about the fact that
+ * all cascaded masters have the same dsa_master_addr_list_lock_key,
+ * which it normally would not do if the cascaded masters would
+ * be in a proper upper/lower relationship, which we've just
+ * destroyed.
+ * To suppress the lockdep warnings, let's actually unregister
+ * the DSA slave interfaces too, to avoid the nonsensical
+ * multicast address list synchronization on shutdown.
+ */
+ unregister_netdevice_queue(slave_dev, &unregister_list);
+ }
+ unregister_netdevice_many(&unregister_list);
+
+ rtnl_unlock();
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 33ab7d7af9eb..a5c9bc7b66c6 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
+void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 616330a16d31..f6f12ad2b525 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -380,6 +380,8 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
switchdev_bridge_port_unoffload(brport_dev, dp,
&dsa_slave_switchdev_notifier,
&dsa_slave_switchdev_blocking_notifier);
+
+ dsa_flush_workqueue();
}
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
@@ -515,14 +517,15 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
- int err, i;
+ struct dsa_port *other_dp;
+ int err;
/* VLAN awareness was off, so the question is "can we turn it on".
* We may have had 8021q uppers, those need to go. Make sure we don't
* enter an inconsistent state: deny changing the VLAN awareness state
* as long as we have 8021q uppers.
*/
- if (vlan_filtering && dsa_is_user_port(ds, dp->index)) {
+ if (vlan_filtering && dsa_port_is_user(dp)) {
struct net_device *upper_dev, *slave = dp->slave;
struct net_device *br = dp->bridge_dev;
struct list_head *iter;
@@ -557,10 +560,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
* different ports of the same switch device and one of them has a
* different setting than what is being requested.
*/
- for (i = 0; i < ds->num_ports; i++) {
+ dsa_switch_for_each_port(other_dp, ds) {
struct net_device *other_bridge;
- other_bridge = dsa_to_port(ds, i)->bridge_dev;
+ other_bridge = other_dp->bridge_dev;
if (!other_bridge)
continue;
/* If it's the same bridge, it also has same
@@ -607,20 +610,16 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
return err;
if (ds->vlan_filtering_is_global) {
- int port;
+ struct dsa_port *other_dp;
ds->vlan_filtering = vlan_filtering;
- for (port = 0; port < ds->num_ports; port++) {
- struct net_device *slave;
-
- if (!dsa_is_user_port(ds, port))
- continue;
+ dsa_switch_for_each_user_port(other_dp, ds) {
+ struct net_device *slave = dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
*/
- slave = dsa_to_port(ds, port)->slave;
if (!slave)
continue;
@@ -1041,7 +1040,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
struct phy_device *phydev = NULL;
struct dsa_switch *ds = dp->ds;
- if (dsa_is_user_port(ds, dp->index))
+ if (dsa_port_is_user(dp))
phydev = dp->slave->phydev;
if (!ds->ops->phylink_mac_link_down) {
@@ -1169,6 +1168,10 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
dp->pl_config.type = PHYLINK_DEV;
dp->pl_config.pcs_poll = ds->pcs_poll;
+ if (ds->ops->phylink_get_interfaces)
+ ds->ops->phylink_get_interfaces(ds, dp->index,
+ dp->pl_config.supported_interfaces);
+
dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
mode, &dsa_port_phylink_mac_ops);
if (IS_ERR(dp->pl)) {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 662ff531d4e2..ad61f6bc8886 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -174,7 +174,7 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
dev_uc_del(master, dev->dev_addr);
out:
- ether_addr_copy(dev->dev_addr, addr->sa_data);
+ eth_hw_addr_set(dev, addr->sa_data);
return 0;
}
@@ -789,6 +789,37 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
return -EOPNOTSUPP;
}
+static void dsa_slave_get_eth_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_eth_phy_stats)
+ ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats);
+}
+
+static void dsa_slave_get_eth_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_eth_mac_stats)
+ ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats);
+}
+
+static void
+dsa_slave_get_eth_ctrl_stats(struct net_device *dev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_eth_ctrl_stats)
+ ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats);
+}
+
static void dsa_slave_net_selftest(struct net_device *ndev,
struct ethtool_test *etest, u64 *buf)
{
@@ -1695,6 +1726,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_strings = dsa_slave_get_strings,
.get_ethtool_stats = dsa_slave_get_ethtool_stats,
.get_sset_count = dsa_slave_get_sset_count,
+ .get_eth_phy_stats = dsa_slave_get_eth_phy_stats,
+ .get_eth_mac_stats = dsa_slave_get_eth_mac_stats,
+ .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats,
.set_wol = dsa_slave_set_wol,
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
@@ -1837,6 +1871,10 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
dp->pl_config.poll_fixed_state = true;
}
+ if (ds->ops->phylink_get_interfaces)
+ ds->ops->phylink_get_interfaces(ds, dp->index,
+ dp->pl_config.supported_interfaces);
+
dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode,
&dsa_port_phylink_mac_ops);
if (IS_ERR(dp->pl)) {
@@ -1854,13 +1892,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* use the switch internal MDIO bus instead
*/
ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
- if (ret) {
- netdev_err(slave_dev,
- "failed to connect to port %d: %d\n",
- dp->index, ret);
- phylink_destroy(dp->pl);
- return ret;
- }
+ }
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
+ ERR_PTR(ret));
+ phylink_destroy(dp->pl);
}
return ret;
@@ -1956,7 +1992,7 @@ int dsa_slave_create(struct dsa_port *port)
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
if (!is_zero_ether_addr(port->mac))
- ether_addr_copy(slave_dev->dev_addr, port->mac);
+ eth_hw_addr_set(slave_dev, port->mac);
else
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
@@ -2336,7 +2372,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
dst = cpu_dp->ds->dst;
list_for_each_entry(dp, &dst->ports, list) {
- if (!dsa_is_user_port(dp->ds, dp->index))
+ if (!dsa_port_is_user(dp))
continue;
list_add(&dp->slave->close_list, &close_list);
@@ -2381,7 +2417,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
dp = dsa_to_port(ds, switchdev_work->port);
- rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
@@ -2416,9 +2451,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
break;
}
- rtnl_unlock();
- dev_put(switchdev_work->dev);
kfree(switchdev_work);
}
@@ -2439,10 +2472,9 @@ static bool dsa_foreign_dev_check(const struct net_device *dev,
}
static int dsa_slave_fdb_event(struct net_device *dev,
- const struct net_device *orig_dev,
- const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info,
- unsigned long event)
+ struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
{
struct dsa_switchdev_event_work *switchdev_work;
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -2491,31 +2523,11 @@ static int dsa_slave_fdb_event(struct net_device *dev,
switchdev_work->vid = fdb_info->vid;
switchdev_work->host_addr = host_addr;
- /* Hold a reference for dsa_fdb_offload_notify */
- dev_hold(dev);
dsa_schedule_work(&switchdev_work->work);
return 0;
}
-static int
-dsa_slave_fdb_add_to_device(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info)
-{
- return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
- SWITCHDEV_FDB_ADD_TO_DEVICE);
-}
-
-static int
-dsa_slave_fdb_del_to_device(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info)
-{
- return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
- SWITCHDEV_FDB_DEL_TO_DEVICE);
-}
-
/* Called under rcu_read_lock() */
static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
@@ -2530,18 +2542,12 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
dsa_slave_port_attr_set);
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- err = switchdev_handle_fdb_add_to_device(dev, ptr,
- dsa_slave_dev_check,
- dsa_foreign_dev_check,
- dsa_slave_fdb_add_to_device,
- NULL);
- return notifier_from_errno(err);
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- err = switchdev_handle_fdb_del_to_device(dev, ptr,
- dsa_slave_dev_check,
- dsa_foreign_dev_check,
- dsa_slave_fdb_del_to_device,
- NULL);
+ err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_event,
+ NULL);
return notifier_from_errno(err);
default:
return NOTIFY_DONE;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 1c797ec8e2c2..bb155a16d454 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -17,14 +17,11 @@
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
{
- int i;
-
- for (i = 0; i < ds->num_ports; ++i) {
- struct dsa_port *dp = dsa_to_port(ds, i);
+ struct dsa_port *dp;
+ dsa_switch_for_each_port(dp, ds)
if (dp->ageing_time && dp->ageing_time < ageing_time)
ageing_time = dp->ageing_time;
- }
return ageing_time;
}
@@ -49,10 +46,10 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
return 0;
}
-static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mtu_info *info)
+static bool dsa_port_mtu_match(struct dsa_port *dp,
+ struct dsa_notifier_mtu_info *info)
{
- if (ds->index == info->sw_index && port == info->port)
+ if (dp->ds->index == info->sw_index && dp->index == info->port)
return true;
/* Do not propagate to other switches in the tree if the notifier was
@@ -61,7 +58,7 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
if (info->targeted_match)
return false;
- if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
return true;
return false;
@@ -70,14 +67,16 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
static int dsa_switch_mtu(struct dsa_switch *ds,
struct dsa_notifier_mtu_info *info)
{
- int port, ret;
+ struct dsa_port *dp;
+ int ret;
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mtu_match(ds, port, info)) {
- ret = ds->ops->port_change_mtu(ds, port, info->mtu);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_mtu_match(dp, info)) {
+ ret = ds->ops->port_change_mtu(ds, dp->index,
+ info->mtu);
if (ret)
return ret;
}
@@ -120,7 +119,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
bool vlan_filtering;
- int err, port;
+ struct dsa_port *dp;
+ int err;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
ds->ops->port_bridge_leave)
@@ -150,10 +150,10 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* VLAN-aware bridge.
*/
if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- for (port = 0; port < ds->num_ports; port++) {
+ dsa_switch_for_each_port(dp, ds) {
struct net_device *bridge_dev;
- bridge_dev = dsa_to_port(ds, port)->bridge_dev;
+ bridge_dev = dp->bridge_dev;
if (bridge_dev && br_vlan_enabled(bridge_dev)) {
change_vlan_filtering = false;
@@ -168,7 +168,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (extack._msg)
dev_err(ds->dev, "port %d: %s\n", info->port,
extack._msg);
- if (err && err != EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP)
return err;
}
@@ -179,19 +179,19 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* DSA links) that sit between the targeted port on which the notifier was
* emitted and its dedicated CPU port.
*/
-static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port,
- int info_sw_index, int info_port)
+static bool dsa_port_host_address_match(struct dsa_port *dp,
+ int info_sw_index, int info_port)
{
struct dsa_port *targeted_dp, *cpu_dp;
struct dsa_switch *targeted_ds;
- targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index);
+ targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index);
targeted_dp = dsa_to_port(targeted_ds, info_port);
cpu_dp = targeted_dp->cpu_dp;
- if (dsa_switch_is_upstream_of(ds, targeted_ds))
- return port == dsa_towards_port(ds, cpu_dp->ds->index,
- cpu_dp->index);
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_ds))
+ return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index,
+ cpu_dp->index);
return false;
}
@@ -209,31 +209,36 @@ static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list,
return NULL;
}
-static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_do_mdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
- int err;
+ int port = dp->index;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_add(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_mdb_add(ds, port, mdb);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, mdb->addr);
@@ -241,64 +246,80 @@ static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
-static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_do_mdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
- int err;
+ int port = dp->index;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_del(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_mdb_del(ds, port, mdb);
if (err) {
- refcount_inc(&a->refcount);
- return err;
+ refcount_set(&a->refcount, 1);
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
-static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
+static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
- int err;
+ int port = dp->index;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_add(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_fdb_add(ds, port, addr, vid);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, addr);
@@ -306,53 +327,63 @@ static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
-static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
+static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
- int err;
+ int port = dp->index;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_del(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_fdb_del(ds, port, addr, vid);
if (err) {
- refcount_inc(&a->refcount);
- return err;
+ refcount_set(&a->refcount, 1);
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
+ struct dsa_port *dp;
int err = 0;
- int port;
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_host_address_match(ds, port, info->sw_index,
- info->port)) {
- err = dsa_switch_do_fdb_add(ds, port, info->addr,
- info->vid);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_address_match(dp, info->sw_index,
+ info->port)) {
+ err = dsa_port_do_fdb_add(dp, info->addr, info->vid);
if (err)
break;
}
@@ -364,17 +395,16 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
+ struct dsa_port *dp;
int err = 0;
- int port;
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_host_address_match(ds, port, info->sw_index,
- info->port)) {
- err = dsa_switch_do_fdb_del(ds, port, info->addr,
- info->vid);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_address_match(dp, info->sw_index,
+ info->port)) {
+ err = dsa_port_do_fdb_del(dp, info->addr, info->vid);
if (err)
break;
}
@@ -387,22 +417,24 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
+ struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid);
+ return dsa_port_do_fdb_add(dp, info->addr, info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
+ struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid);
+ return dsa_port_do_fdb_del(dp, info->addr, info->vid);
}
static int dsa_switch_hsr_join(struct dsa_switch *ds,
@@ -468,37 +500,39 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
+ struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- return dsa_switch_do_mdb_add(ds, port, info->mdb);
+ return dsa_port_do_mdb_add(dp, info->mdb);
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
+ struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
- return dsa_switch_do_mdb_del(ds, port, info->mdb);
+ return dsa_port_do_mdb_del(dp, info->mdb);
}
static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
+ struct dsa_port *dp;
int err = 0;
- int port;
if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_host_address_match(ds, port, info->sw_index,
- info->port)) {
- err = dsa_switch_do_mdb_add(ds, port, info->mdb);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_address_match(dp, info->sw_index,
+ info->port)) {
+ err = dsa_port_do_mdb_add(dp, info->mdb);
if (err)
break;
}
@@ -510,16 +544,16 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
+ struct dsa_port *dp;
int err = 0;
- int port;
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_host_address_match(ds, port, info->sw_index,
- info->port)) {
- err = dsa_switch_do_mdb_del(ds, port, info->mdb);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_address_match(dp, info->sw_index,
+ info->port)) {
+ err = dsa_port_do_mdb_del(dp, info->mdb);
if (err)
break;
}
@@ -528,13 +562,13 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
return err;
}
-static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_vlan_info *info)
+static bool dsa_port_vlan_match(struct dsa_port *dp,
+ struct dsa_notifier_vlan_info *info)
{
- if (ds->index == info->sw_index && port == info->port)
+ if (dp->ds->index == info->sw_index && dp->index == info->port)
return true;
- if (dsa_is_dsa_port(ds, port))
+ if (dsa_port_is_dsa(dp))
return true;
return false;
@@ -543,14 +577,15 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
- int port, err;
+ struct dsa_port *dp;
+ int err;
if (!ds->ops->port_vlan_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_vlan_match(ds, port, info)) {
- err = ds->ops->port_vlan_add(ds, port, info->vlan,
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_vlan_match(dp, info)) {
+ err = ds->ops->port_vlan_add(ds, dp->index, info->vlan,
info->extack);
if (err)
return err;
@@ -579,38 +614,34 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
struct dsa_notifier_tag_proto_info *info)
{
const struct dsa_device_ops *tag_ops = info->tag_ops;
- int port, err;
+ struct dsa_port *dp, *cpu_dp;
+ int err;
if (!ds->ops->change_tag_protocol)
return -EOPNOTSUPP;
ASSERT_RTNL();
- for (port = 0; port < ds->num_ports; port++) {
- if (!dsa_is_cpu_port(ds, port))
- continue;
-
- err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
+ err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
+ tag_ops->proto);
if (err)
return err;
- dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops);
+ dsa_port_set_tag_protocol(cpu_dp, tag_ops);
}
/* Now that changing the tag protocol can no longer fail, let's update
* the remaining bits which are "duplicated for faster access", and the
* bits that depend on the tagger, such as the MTU.
*/
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_is_user_port(ds, port)) {
- struct net_device *slave;
+ dsa_switch_for_each_user_port(dp, ds) {
+ struct net_device *slave = dp->slave;
- slave = dsa_to_port(ds, port)->slave;
- dsa_slave_setup_tagger(slave);
+ dsa_slave_setup_tagger(slave);
- /* rtnl_mutex is held in dsa_tree_change_tag_proto */
- dsa_slave_change_mtu(slave, slave->mtu);
- }
+ /* rtnl_mutex is held in dsa_tree_change_tag_proto */
+ dsa_slave_change_mtu(slave, slave->mtu);
}
return 0;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index f8f7b7c34e7d..72cac2c0af7b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -6,7 +6,6 @@
* dsa_8021q_netdev_ops is registered for API compliance and not used
* directly by callers.
*/
-#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/dsa/8021q.h>
@@ -78,22 +77,22 @@ EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
/* Returns the VID to be inserted into the frame from xmit for switch steering
* instructions on egress. Encodes switch ID and port ID.
*/
-u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port)
+u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp)
{
- return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) |
- DSA_8021Q_PORT(port);
+ return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+ DSA_8021Q_PORT(dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid);
/* Returns the VID that will be installed as pvid for this switch port, sent as
* tagged egress towards the CPU port and decoded by the rcv function.
*/
-u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
+u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp)
{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
- DSA_8021Q_PORT(port);
+ return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+ DSA_8021Q_PORT(dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid);
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
@@ -139,12 +138,13 @@ dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid)
return NULL;
}
-static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
- u16 vid, u16 flags)
+static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid,
+ u16 flags)
{
- struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx;
+ struct dsa_switch *ds = dp->ds;
struct dsa_tag_8021q_vlan *v;
+ int port = dp->index;
int err;
/* No need to bother with refcounting for user ports */
@@ -175,12 +175,12 @@ static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
- u16 vid)
+static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid)
{
- struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx;
+ struct dsa_switch *ds = dp->ds;
struct dsa_tag_8021q_vlan *v;
+ int port = dp->index;
int err;
/* No need to bother with refcounting for user ports */
@@ -207,14 +207,16 @@ static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
}
static bool
-dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_tag_8021q_vlan_info *info)
+dsa_port_tag_8021q_vlan_match(struct dsa_port *dp,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
{
- if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ struct dsa_switch *ds = dp->ds;
+
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
return true;
if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
- return port == info->port;
+ return dp->index == info->port;
return false;
}
@@ -222,7 +224,8 @@ dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info)
{
- int port, err;
+ struct dsa_port *dp;
+ int err;
/* Since we use dsa_broadcast(), there might be other switches in other
* trees which don't support tag_8021q, so don't return an error.
@@ -232,21 +235,20 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx)
return 0;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_tag_8021q_vlan_match(dp, info)) {
u16 flags = 0;
- if (dsa_is_user_port(ds, port))
+ if (dsa_port_is_user(dp))
flags |= BRIDGE_VLAN_INFO_UNTAGGED;
if (vid_is_dsa_8021q_rxvlan(info->vid) &&
dsa_8021q_rx_switch_id(info->vid) == ds->index &&
- dsa_8021q_rx_source_port(info->vid) == port)
+ dsa_8021q_rx_source_port(info->vid) == dp->index)
flags |= BRIDGE_VLAN_INFO_PVID;
- err = dsa_switch_do_tag_8021q_vlan_add(ds, port,
- info->vid,
- flags);
+ err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid,
+ flags);
if (err)
return err;
}
@@ -258,15 +260,15 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info)
{
- int port, err;
+ struct dsa_port *dp;
+ int err;
if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx)
return 0;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
- err = dsa_switch_do_tag_8021q_vlan_del(ds, port,
- info->vid);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_tag_8021q_vlan_match(dp, info)) {
+ err = dsa_port_do_tag_8021q_vlan_del(dp, info->vid);
if (err)
return err;
}
@@ -322,15 +324,14 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
* +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
* swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
*/
-static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_bridge_info *info)
+static bool
+dsa_port_tag_8021q_bridge_match(struct dsa_port *dp,
+ struct dsa_notifier_bridge_info *info)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
-
/* Don't match on self */
- if (ds->dst->index == info->tree_index &&
- ds->index == info->sw_index &&
- port == info->port)
+ if (dp->ds->dst->index == info->tree_index &&
+ dp->ds->index == info->sw_index &&
+ dp->index == info->port)
return false;
if (dsa_port_is_user(dp))
@@ -344,21 +345,21 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
{
struct dsa_switch *targeted_ds;
struct dsa_port *targeted_dp;
+ struct dsa_port *dp;
u16 targeted_rx_vid;
- int err, port;
+ int err;
if (!ds->tag_8021q_ctx)
return 0;
targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+ targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
- for (port = 0; port < ds->num_ports; port++) {
- struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ dsa_switch_for_each_port(dp, ds) {
+ u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ if (!dsa_port_tag_8021q_bridge_match(dp, info))
continue;
/* Install the RX VID of the targeted port in our VLAN table */
@@ -380,21 +381,20 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
{
struct dsa_switch *targeted_ds;
struct dsa_port *targeted_dp;
+ struct dsa_port *dp;
u16 targeted_rx_vid;
- int port;
if (!ds->tag_8021q_ctx)
return 0;
targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+ targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
- for (port = 0; port < ds->num_ports; port++) {
- struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ dsa_switch_for_each_port(dp, ds) {
+ u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ if (!dsa_port_tag_8021q_bridge_match(dp, info))
continue;
/* Remove the RX VID of the targeted port from our VLAN table */
@@ -433,8 +433,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+ u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
struct net_device *master;
int err;
@@ -478,8 +478,8 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+ u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
struct net_device *master;
/* The CPU port is implicitly configured by
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 77d0ce89ab77..b3da4b2ea11c 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -45,6 +45,7 @@
* 6 6 2 2 4 2 N
*/
+#include <linux/dsa/mv88e6xxx.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -129,12 +130,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
u8 *dsa_header;
- u16 pvid = 0;
- int err;
if (skb->offload_fwd_mark) {
struct dsa_switch_tree *dst = dp->ds->dst;
- struct net_device *br = dp->bridge_dev;
cmd = DSA_CMD_FORWARD;
@@ -144,19 +142,6 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
*/
tag_dev = dst->last_switch + 1 + dp->bridge_num;
tag_port = 0;
-
- /* If we are offloading forwarding for a VLAN-unaware bridge,
- * inject packets to hardware using the bridge's pvid, since
- * that's where the packets ingressed from.
- */
- if (!br_vlan_enabled(br)) {
- /* Safe because __dev_queue_xmit() runs under
- * rcu_read_lock_bh()
- */
- err = br_vlan_get_pvid_rcu(br, &pvid);
- if (err)
- return NULL;
- }
} else {
cmd = DSA_CMD_FROM_CPU;
tag_dev = dp->ds->index;
@@ -180,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
dsa_header[2] &= ~0x10;
}
} else {
+ struct net_device *br = dp->bridge_dev;
+ u16 vid;
+
+ vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+
skb_push(skb, DSA_HLEN + extra);
dsa_alloc_etype_header(skb, DSA_HLEN + extra);
- /* Construct untagged DSA tag. */
+ /* Construct DSA header from untagged frame. */
dsa_header = dsa_etype_header_pos_tx(skb) + extra;
dsa_header[0] = (cmd << 6) | tag_dev;
dsa_header[1] = tag_port << 3;
- dsa_header[2] = pvid >> 8;
- dsa_header[3] = pvid & 0xff;
+ dsa_header[2] = vid >> 8;
+ dsa_header[3] = vid & 0xff;
}
return skb;
@@ -210,7 +200,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
cmd = dsa_header[0] >> 6;
switch (cmd) {
case DSA_CMD_FORWARD:
- trunk = !!(dsa_header[1] & 7);
+ trunk = !!(dsa_header[1] & 4);
break;
case DSA_CMD_TO_CPU:
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index fa1d60d13ad9..3509fc967ca9 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -6,7 +6,6 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
-#include <linux/slab.h>
#include <net/dsa.h>
#include "dsa_priv.h"
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..de1c849a0a70 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,19 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
-#include <soc/mscc/ocelot.h>
#include "dsa_priv.h"
+/* If the port is under a VLAN-aware bridge, remove the VLAN header from the
+ * payload and move it into the DSA tag, which will make the switch classify
+ * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero,
+ * which is the pvid of standalone and VLAN-unaware bridge ports.
+ */
+static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp,
+ u64 *vlan_tci, u64 *tag_type)
+{
+ struct net_device *br = READ_ONCE(dp->bridge_dev);
+ struct vlan_ethhdr *hdr;
+ u16 proto, tci;
+
+ if (!br || !br_vlan_enabled(br)) {
+ *vlan_tci = 0;
+ *tag_type = IFH_TAG_TYPE_C;
+ return;
+ }
+
+ hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+ br_vlan_get_proto(br, &proto);
+
+ if (ntohs(hdr->h_vlan_proto) == proto) {
+ __skb_vlan_pop(skb, &tci);
+ *vlan_tci = tci;
+ } else {
+ rcu_read_lock();
+ br_vlan_get_pvid_rcu(br, &tci);
+ rcu_read_unlock();
+ *vlan_tci = tci;
+ }
+
+ *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C;
+}
+
static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
__be32 ifh_prefix, void **ifh)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
struct dsa_switch *ds = dp->ds;
+ u64 vlan_tci, tag_type;
void *injection;
__be32 *prefix;
u32 rew_op = 0;
+ ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type);
+
injection = skb_push(skb, OCELOT_TAG_LEN);
prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN);
@@ -22,6 +58,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
ocelot_ifh_set_bypass(injection, 1);
ocelot_ifh_set_src(injection, ds->num_ports);
ocelot_ifh_set_qos_class(injection, skb->priority);
+ ocelot_ifh_set_vlan_tci(injection, vlan_tci);
+ ocelot_ifh_set_tag_type(injection, tag_type);
rew_op = ocelot_ptp_rew_op(skb);
if (rew_op)
@@ -63,6 +101,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
struct dsa_port *dp;
u8 *extraction;
u16 vlan_tpid;
+ u64 rew_val;
/* Revert skb->data by the amount consumed by the DSA master,
* so it points to the beginning of the frame.
@@ -92,6 +131,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
ocelot_xfh_get_qos_class(extraction, &qos_class);
ocelot_xfh_get_tag_type(extraction, &tag_type);
ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
+ ocelot_xfh_get_rew_val(extraction, &rew_val);
skb->dev = dsa_master_find_slave(netdev, 0, src_port);
if (!skb->dev)
@@ -105,6 +145,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
dsa_default_offload_fwd_mark(skb);
skb->priority = qos_class;
+ OCELOT_SKB_CB(skb)->tstamp_lo = rew_val;
/* Ocelot switches copy frames unmodified to the CPU. However, it is
* possible for the user to request a VLAN modification through
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..a1919ea5e828 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
*
* An implementation of the software-defined tag_8021q.c tagger format, which
* also preserves full functionality under a vlan_filtering bridge. It does
@@ -9,29 +9,43 @@
* that on egress
*/
#include <linux/dsa/8021q.h>
-#include <soc/mscc/ocelot.h>
-#include <soc/mscc/ocelot_ptp.h>
+#include <linux/dsa/ocelot.h>
#include "dsa_priv.h"
+static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
+ struct sk_buff *skb)
+{
+ struct felix_deferred_xmit_work *xmit_work;
+ struct felix_port *felix_port = dp->priv;
+
+ xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+ if (!xmit_work)
+ return NULL;
+
+ /* Calls felix_port_deferred_xmit in felix.c */
+ kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ xmit_work->dp = dp;
+ xmit_work->skb = skb_get(skb);
+
+ kthread_queue_work(felix_port->xmit_worker, &xmit_work->work);
+
+ return NULL;
+}
+
static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- struct ocelot *ocelot = dp->ds->priv;
- int port = dp->index;
- u32 rew_op = 0;
+ u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ struct ethhdr *hdr = eth_hdr(skb);
- rew_op = ocelot_ptp_rew_op(skb);
- if (rew_op) {
- if (!ocelot_can_inject(ocelot, 0))
- return NULL;
-
- ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
- return NULL;
- }
+ if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
+ return ocelot_defer_xmit(dp, skb);
return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index f920487ae145..6d928ee3ef7a 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -54,7 +54,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
p = (__be16 *)tag;
*p = htons(RTL4_A_ETHERTYPE);
- out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8);
+ out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
/* The lower bits indicate the port number */
out |= BIT(dp->index);
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
new file mode 100644
index 000000000000..02686ad4045d
--- /dev/null
+++ b/net/dsa/tag_rtl8_4.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handler for Realtek 8 byte switch tags
+ *
+ * Copyright (C) 2021 Alvin Å ipraga <alsi@bang-olufsen.dk>
+ *
+ * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
+ * named tag_rtl8_4.
+ *
+ * This tag header has the following format:
+ *
+ * -------------------------------------------
+ * | MAC DA | MAC SA | 8 byte tag | Type | ...
+ * -------------------------------------------
+ * _______________/ \______________________________________
+ * / \
+ * 0 7|8 15
+ * |-----------------------------------+-----------------------------------|---
+ * | (16-bit) | ^
+ * | Realtek EtherType [0x8899] | |
+ * |-----------------------------------+-----------------------------------| 8
+ * | (8-bit) | (8-bit) |
+ * | Protocol [0x04] | REASON | b
+ * |-----------------------------------+-----------------------------------| y
+ * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t
+ * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e
+ * |-----------------------------------+-----------------------------------| s
+ * | (1) | (15-bit) | |
+ * | ALLOW | TX/RX | v
+ * |-----------------------------------+-----------------------------------|---
+ *
+ * With the following field descriptions:
+ *
+ * field | description
+ * ------------+-------------
+ * Realtek | 0x8899: indicates that this is a proprietary Realtek tag;
+ * EtherType | note that Realtek uses the same EtherType for
+ * | other incompatible tag formats (e.g. tag_rtl4_a.c)
+ * Protocol | 0x04: indicates that this tag conforms to this format
+ * X | reserved
+ * ------------+-------------
+ * REASON | reason for forwarding packet to CPU
+ * | 0: packet was forwarded or flooded to CPU
+ * | 80: packet was trapped to CPU
+ * FID_EN | 1: packet has an FID
+ * | 0: no FID
+ * FID | FID of packet (if FID_EN=1)
+ * PRI_EN | 1: force priority of packet
+ * | 0: don't force priority
+ * PRI | priority of packet (if PRI_EN=1)
+ * KEEP | preserve packet VLAN tag format
+ * LEARN_DIS | don't learn the source MAC address of the packet
+ * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the
+ * | packet may only be forwarded to ports specified in the
+ * | mask
+ * | 0: no allowance port mask, TX/RX field is the forwarding
+ * | port mask
+ * TX/RX | TX (switch->CPU): port number the packet was received on
+ * | RX (CPU->switch): forwarding port mask (if ALLOW=0)
+ * | allowance port mask (if ALLOW=1)
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/etherdevice.h>
+
+#include "dsa_priv.h"
+
+/* Protocols supported:
+ *
+ * 0x04 = RTL8365MB DSA protocol
+ */
+
+#define RTL8_4_TAG_LEN 8
+
+#define RTL8_4_PROTOCOL GENMASK(15, 8)
+#define RTL8_4_PROTOCOL_RTL8365MB 0x04
+#define RTL8_4_REASON GENMASK(7, 0)
+#define RTL8_4_REASON_FORWARD 0
+#define RTL8_4_REASON_TRAP 80
+
+#define RTL8_4_LEARN_DIS BIT(5)
+
+#define RTL8_4_TX GENMASK(3, 0)
+#define RTL8_4_RX GENMASK(10, 0)
+
+static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ __be16 *tag;
+
+ skb_push(skb, RTL8_4_TAG_LEN);
+
+ dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
+ tag = dsa_etype_header_pos_tx(skb);
+
+ /* Set Realtek EtherType */
+ tag[0] = htons(ETH_P_REALTEK);
+
+ /* Set Protocol; zero REASON */
+ tag[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB));
+
+ /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */
+ tag[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1));
+
+ /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
+ tag[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index)));
+
+ return skb;
+}
+
+static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ __be16 *tag;
+ u16 etype;
+ u8 reason;
+ u8 proto;
+ u8 port;
+
+ if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
+ return NULL;
+
+ tag = dsa_etype_header_pos_rx(skb);
+
+ /* Parse Realtek EtherType */
+ etype = ntohs(tag[0]);
+ if (unlikely(etype != ETH_P_REALTEK)) {
+ dev_warn_ratelimited(&dev->dev,
+ "non-realtek ethertype 0x%04x\n", etype);
+ return NULL;
+ }
+
+ /* Parse Protocol */
+ proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag[1]));
+ if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
+ dev_warn_ratelimited(&dev->dev,
+ "unknown realtek protocol 0x%02x\n",
+ proto);
+ return NULL;
+ }
+
+ /* Parse REASON */
+ reason = FIELD_GET(RTL8_4_REASON, ntohs(tag[1]));
+
+ /* Parse TX (switch->CPU) */
+ port = FIELD_GET(RTL8_4_TX, ntohs(tag[3]));
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev) {
+ dev_warn_ratelimited(&dev->dev,
+ "could not find slave for port %d\n",
+ port);
+ return NULL;
+ }
+
+ /* Remove tag and recalculate checksum */
+ skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
+
+ dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
+
+ if (reason != RTL8_4_REASON_TRAP)
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
+static const struct dsa_device_ops rtl8_4_netdev_ops = {
+ .name = "rtl8_4",
+ .proto = DSA_TAG_PROTO_RTL8_4,
+ .xmit = rtl8_4_tag_xmit,
+ .rcv = rtl8_4_tag_rcv,
+ .needed_headroom = RTL8_4_TAG_LEN,
+};
+module_dsa_tag_driver(rtl8_4_netdev_ops);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c054f48541c8..262c8833a910 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -4,6 +4,7 @@
#include <linux/if_vlan.h>
#include <linux/dsa/sja1105.h>
#include <linux/dsa/8021q.h>
+#include <linux/skbuff.h>
#include <linux/packing.h>
#include "dsa_priv.h"
@@ -53,6 +54,11 @@
#define SJA1110_TX_TRAILER_LEN 4
#define SJA1110_MAX_PADDING_LEN 15
+enum sja1110_meta_tstamp {
+ SJA1110_META_TSTAMP_TX = 0,
+ SJA1110_META_TSTAMP_RX = 1,
+};
+
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
static inline bool sja1105_is_link_local(const struct sk_buff *skb)
{
@@ -152,10 +158,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp)
* we're sure about that). It may not be on this port though, so we
* need to find it.
*/
- list_for_each_entry(other_dp, &ds->dst->ports, list) {
- if (other_dp->ds != ds)
- continue;
-
+ dsa_switch_for_each_port(other_dp, ds) {
if (!other_dp->bridge_dev)
continue;
@@ -232,9 +235,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
if (skb->offload_fwd_mark)
return sja1105_imprecise_xmit(skb, netdev);
@@ -260,9 +263,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
{
struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
struct dsa_port *dp = dsa_slave_to_port(netdev);
- u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
__be32 *tx_trailer;
__be16 *tx_header;
int trailer_pos;
@@ -520,6 +523,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
is_meta);
}
+static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+ u8 ts_id, enum sja1110_meta_tstamp dir,
+ u64 tstamp)
+{
+ struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct skb_shared_hwtstamps shwt = {0};
+ struct sja1105_port *sp = dp->priv;
+
+ if (!dsa_port_is_sja1105(dp))
+ return;
+
+ /* We don't care about RX timestamps on the CPU port */
+ if (dir == SJA1110_META_TSTAMP_RX)
+ return;
+
+ spin_lock(&sp->data->skb_txtstamp_queue.lock);
+
+ skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) {
+ if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+ continue;
+
+ __skb_unlink(skb, &sp->data->skb_txtstamp_queue);
+ skb_match = skb;
+
+ break;
+ }
+
+ spin_unlock(&sp->data->skb_txtstamp_queue.lock);
+
+ if (WARN_ON(!skb_match))
+ return;
+
+ shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+ skb_complete_tx_timestamp(skb_match, &shwt);
+}
+
static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
{
u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 73fce9467467..c7d9e08107cb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -51,6 +51,7 @@
#include <linux/if_ether.h>
#include <linux/of_net.h>
#include <linux/pci.h>
+#include <linux/property.h>
#include <net/dst.h>
#include <net/arp.h>
#include <net/sock.h>
@@ -304,7 +305,7 @@ void eth_commit_mac_addr_change(struct net_device *dev, void *p)
{
struct sockaddr *addr = p;
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+ eth_hw_addr_set(dev, addr->sa_data);
}
EXPORT_SYMBOL(eth_commit_mac_addr_change);
@@ -523,6 +524,26 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
EXPORT_SYMBOL(eth_platform_get_mac_address);
/**
+ * platform_get_ethdev_address - Set netdev's MAC address from a given device
+ * @dev: Pointer to the device
+ * @netdev: Pointer to netdev to write the address to
+ *
+ * Wrapper around eth_platform_get_mac_address() which writes the address
+ * directly to netdev->dev_addr.
+ */
+int platform_get_ethdev_address(struct device *dev, struct net_device *netdev)
+{
+ u8 addr[ETH_ALEN] __aligned(2);
+ int ret;
+
+ ret = eth_platform_get_mac_address(dev, addr);
+ if (!ret)
+ eth_hw_addr_set(netdev, addr);
+ return ret;
+}
+EXPORT_SYMBOL(platform_get_ethdev_address);
+
+/**
* nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named
* 'mac-address' associated with given device.
*
@@ -557,4 +578,81 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf)
return 0;
}
-EXPORT_SYMBOL(nvmem_get_mac_address);
+
+static int fwnode_get_mac_addr(struct fwnode_handle *fwnode,
+ const char *name, char *addr)
+{
+ int ret;
+
+ ret = fwnode_property_read_u8_array(fwnode, name, addr, ETH_ALEN);
+ if (ret)
+ return ret;
+
+ if (!is_valid_ether_addr(addr))
+ return -EINVAL;
+ return 0;
+}
+
+/**
+ * fwnode_get_mac_address - Get the MAC from the firmware node
+ * @fwnode: Pointer to the firmware node
+ * @addr: Address of buffer to store the MAC in
+ *
+ * Search the firmware node for the best MAC address to use. 'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the firmware tables, but were not updated by the firmware. For
+ * example, the DTS could define 'mac-address' and 'local-mac-address', with
+ * zero MAC addresses. Some older U-Boots only initialized 'local-mac-address'.
+ * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
+ * exists but is all zeros.
+ */
+int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr)
+{
+ if (!fwnode_get_mac_addr(fwnode, "mac-address", addr) ||
+ !fwnode_get_mac_addr(fwnode, "local-mac-address", addr) ||
+ !fwnode_get_mac_addr(fwnode, "address", addr))
+ return 0;
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(fwnode_get_mac_address);
+
+/**
+ * device_get_mac_address - Get the MAC for a given device
+ * @dev: Pointer to the device
+ * @addr: Address of buffer to store the MAC in
+ */
+int device_get_mac_address(struct device *dev, char *addr)
+{
+ return fwnode_get_mac_address(dev_fwnode(dev), addr);
+}
+EXPORT_SYMBOL(device_get_mac_address);
+
+/**
+ * device_get_ethdev_address - Set netdev's MAC address from a given device
+ * @dev: Pointer to the device
+ * @netdev: Pointer to netdev to write the address to
+ *
+ * Wrapper around device_get_mac_address() which writes the address
+ * directly to netdev->dev_addr.
+ */
+int device_get_ethdev_address(struct device *dev, struct net_device *netdev)
+{
+ u8 addr[ETH_ALEN];
+ int ret;
+
+ ret = device_get_mac_address(dev, addr);
+ if (!ret)
+ eth_hw_addr_set(netdev, addr);
+ return ret;
+}
+EXPORT_SYMBOL(device_get_ethdev_address);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 0a19470efbfb..b76432e70e6b 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o phc_vclocks.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index f2abc3152888..65e9bc1058b5 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -32,6 +32,29 @@
#include <generated/utsrelease.h>
#include "common.h"
+/* State held across locks and calls for commands which have devlink fallback */
+struct ethtool_devlink_compat {
+ struct devlink *devlink;
+ union {
+ struct ethtool_flash efl;
+ struct ethtool_drvinfo info;
+ };
+};
+
+static struct devlink *netdev_to_devlink_get(struct net_device *dev)
+{
+ struct devlink_port *devlink_port;
+
+ if (!dev->netdev_ops->ndo_get_devlink_port)
+ return NULL;
+
+ devlink_port = dev->netdev_ops->ndo_get_devlink_port(dev);
+ if (!devlink_port)
+ return NULL;
+
+ return devlink_try_get(devlink_port->devlink);
+}
+
/*
* Some useful ethtool_ops methods that're device independent.
* If we find that all drivers want to do the same thing here,
@@ -89,7 +112,8 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
useraddr += sizeof(cmd);
- if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
+ if (copy_to_user(useraddr, features,
+ array_size(copy_size, sizeof(*features))))
return -EFAULT;
return 0;
@@ -335,7 +359,7 @@ EXPORT_SYMBOL(ethtool_intersect_link_masks);
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32)
{
- bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ linkmode_zero(dst);
dst[0] = legacy_u32;
}
EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode);
@@ -350,11 +374,10 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
__ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
- bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ linkmode_zero(ext);
bitmap_fill(ext, 32);
bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
- if (bitmap_intersects(ext, src,
- __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ if (linkmode_intersects(ext, src)) {
/* src mask goes beyond bit 31 */
retval = false;
}
@@ -697,22 +720,20 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return ret;
}
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
- void __user *useraddr)
+static int
+ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp)
{
- struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GDRVINFO;
- strlcpy(info.version, UTS_RELEASE, sizeof(info.version));
+ rsp->info.cmd = ETHTOOL_GDRVINFO;
+ strlcpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
if (ops->get_drvinfo) {
- ops->get_drvinfo(dev, &info);
+ ops->get_drvinfo(dev, &rsp->info);
} else if (dev->dev.parent && dev->dev.parent->driver) {
- strlcpy(info.bus_info, dev_name(dev->dev.parent),
- sizeof(info.bus_info));
- strlcpy(info.driver, dev->dev.parent->driver->name,
- sizeof(info.driver));
+ strlcpy(rsp->info.bus_info, dev_name(dev->dev.parent),
+ sizeof(rsp->info.bus_info));
+ strlcpy(rsp->info.driver, dev->dev.parent->driver->name,
+ sizeof(rsp->info.driver));
} else {
return -EOPNOTSUPP;
}
@@ -726,30 +747,27 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
rc = ops->get_sset_count(dev, ETH_SS_TEST);
if (rc >= 0)
- info.testinfo_len = rc;
+ rsp->info.testinfo_len = rc;
rc = ops->get_sset_count(dev, ETH_SS_STATS);
if (rc >= 0)
- info.n_stats = rc;
+ rsp->info.n_stats = rc;
rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
if (rc >= 0)
- info.n_priv_flags = rc;
+ rsp->info.n_priv_flags = rc;
}
if (ops->get_regs_len) {
int ret = ops->get_regs_len(dev);
if (ret > 0)
- info.regdump_len = ret;
+ rsp->info.regdump_len = ret;
}
if (ops->get_eeprom_len)
- info.eedump_len = ops->get_eeprom_len(dev);
+ rsp->info.eedump_len = ops->get_eeprom_len(dev);
- if (!info.fw_version[0])
- devlink_compat_running_version(dev, info.fw_version,
- sizeof(info.fw_version));
+ if (!rsp->info.fw_version[0])
+ rsp->devlink = netdev_to_devlink_get(dev);
- if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
return 0;
}
@@ -799,7 +817,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
goto out;
useraddr += offsetof(struct ethtool_sset_info, data);
- if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+ if (copy_to_user(useraddr, info_buf, array_size(idx, sizeof(u32))))
goto out;
ret = 0;
@@ -1022,7 +1040,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
{
int i;
- if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+ if (copy_from_user(indir, useraddr, array_size(size, sizeof(indir[0]))))
return -EFAULT;
/* Validate ring indices */
@@ -1537,6 +1555,10 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
ret = getter(dev, &eeprom, data);
if (ret)
break;
+ if (!eeprom.len) {
+ ret = -EIO;
+ break;
+ }
if (copy_to_user(userbuf, data, eeprom.len)) {
ret = -EFAULT;
break;
@@ -1891,7 +1913,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
if (copy_to_user(useraddr, &test, sizeof(test)))
goto out;
useraddr += sizeof(test);
- if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+ if (copy_to_user(useraddr, data, array_size(test.len, sizeof(u64))))
goto out;
ret = 0;
@@ -1933,7 +1955,8 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
goto out;
useraddr += sizeof(gstrings);
if (gstrings.len &&
- copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+ copy_to_user(useraddr, data,
+ array_size(gstrings.len, ETH_GSTRING_LEN)))
goto out;
ret = 0;
@@ -2173,19 +2196,15 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
return actor(dev, edata.data);
}
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
- char __user *useraddr)
+static int
+ethtool_flash_device(struct net_device *dev, struct ethtool_devlink_compat *req)
{
- struct ethtool_flash efl;
-
- if (copy_from_user(&efl, useraddr, sizeof(efl)))
- return -EFAULT;
- efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
-
- if (!dev->ethtool_ops->flash_device)
- return devlink_compat_flash_update(dev, efl.data);
+ if (!dev->ethtool_ops->flash_device) {
+ req->devlink = netdev_to_devlink_get(dev);
+ return 0;
+ }
- return dev->ethtool_ops->flash_device(dev, &efl);
+ return dev->ethtool_ops->flash_device(dev, &req->efl);
}
static int ethtool_set_dump(struct net_device *dev,
@@ -2695,19 +2714,19 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
-int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
+static int
+__dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr,
+ u32 ethcmd, struct ethtool_devlink_compat *devlink_state)
{
- struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
- u32 ethcmd, sub_cmd;
+ struct net_device *dev;
+ u32 sub_cmd;
int rc;
netdev_features_t old_features;
+ dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
return -ENODEV;
- if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
- return -EFAULT;
-
if (ethcmd == ETHTOOL_PERQUEUE) {
if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
return -EFAULT;
@@ -2781,7 +2800,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
rc = ethtool_set_settings(dev, useraddr);
break;
case ETHTOOL_GDRVINFO:
- rc = ethtool_get_drvinfo(dev, useraddr);
+ rc = ethtool_get_drvinfo(dev, devlink_state);
break;
case ETHTOOL_GREGS:
rc = ethtool_get_regs(dev, useraddr);
@@ -2883,7 +2902,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
break;
case ETHTOOL_FLASHDEV:
- rc = ethtool_flash_device(dev, useraddr);
+ rc = ethtool_flash_device(dev, devlink_state);
break;
case ETHTOOL_RESET:
rc = ethtool_reset(dev, useraddr);
@@ -2995,6 +3014,60 @@ out:
return rc;
}
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
+{
+ struct ethtool_devlink_compat *state;
+ u32 ethcmd;
+ int rc;
+
+ if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+ return -EFAULT;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ switch (ethcmd) {
+ case ETHTOOL_FLASHDEV:
+ if (copy_from_user(&state->efl, useraddr, sizeof(state->efl))) {
+ rc = -EFAULT;
+ goto exit_free;
+ }
+ state->efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+ break;
+ }
+
+ rtnl_lock();
+ rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state);
+ rtnl_unlock();
+ if (rc)
+ goto exit_free;
+
+ switch (ethcmd) {
+ case ETHTOOL_FLASHDEV:
+ if (state->devlink)
+ rc = devlink_compat_flash_update(state->devlink,
+ state->efl.data);
+ break;
+ case ETHTOOL_GDRVINFO:
+ if (state->devlink)
+ devlink_compat_running_version(state->devlink,
+ state->info.fw_version,
+ sizeof(state->info.fw_version));
+ if (copy_to_user(useraddr, &state->info, sizeof(state->info))) {
+ rc = -EFAULT;
+ goto exit_free;
+ }
+ break;
+ }
+
+exit_free:
+ if (state->devlink)
+ devlink_put(state->devlink);
+ kfree(state);
+ return rc;
+}
+
struct ethtool_rx_flow_key {
struct flow_dissector_key_basic basic;
union {
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
new file mode 100644
index 000000000000..bc2cef11bbda
--- /dev/null
+++ b/net/ethtool/module.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct module_req_info {
+ struct ethnl_req_info base;
+};
+
+struct module_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_module_power_mode_params power;
+};
+
+#define MODULE_REPDATA(__reply_base) \
+ container_of(__reply_base, struct module_reply_data, base)
+
+/* MODULE_GET */
+
+const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1] = {
+ [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int module_get_power_mode(struct net_device *dev,
+ struct module_reply_data *data,
+ struct netlink_ext_ack *extack)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (!ops->get_module_power_mode)
+ return 0;
+
+ return ops->get_module_power_mode(dev, &data->power, extack);
+}
+
+static int module_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct module_reply_data *data = MODULE_REPDATA(reply_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = module_get_power_mode(dev, data, extack);
+ if (ret < 0)
+ goto out_complete;
+
+out_complete:
+ ethnl_ops_complete(dev);
+ return ret;
+}
+
+static int module_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ struct module_reply_data *data = MODULE_REPDATA(reply_base);
+ int len = 0;
+
+ if (data->power.policy)
+ len += nla_total_size(sizeof(u8)); /* _MODULE_POWER_MODE_POLICY */
+
+ if (data->power.mode)
+ len += nla_total_size(sizeof(u8)); /* _MODULE_POWER_MODE */
+
+ return len;
+}
+
+static int module_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct module_reply_data *data = MODULE_REPDATA(reply_base);
+
+ if (data->power.policy &&
+ nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE_POLICY,
+ data->power.policy))
+ return -EMSGSIZE;
+
+ if (data->power.mode &&
+ nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE, data->power.mode))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_module_request_ops = {
+ .request_cmd = ETHTOOL_MSG_MODULE_GET,
+ .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_MODULE_HEADER,
+ .req_info_size = sizeof(struct module_req_info),
+ .reply_data_size = sizeof(struct module_reply_data),
+
+ .prepare_data = module_prepare_data,
+ .reply_size = module_reply_size,
+ .fill_reply = module_fill_reply,
+};
+
+/* MODULE_SET */
+
+const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = {
+ [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_MODULE_POWER_MODE_POLICY] =
+ NLA_POLICY_RANGE(NLA_U8, ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH,
+ ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO),
+};
+
+static int module_set_power_mode(struct net_device *dev, struct nlattr **tb,
+ bool *p_mod, struct netlink_ext_ack *extack)
+{
+ struct ethtool_module_power_mode_params power = {};
+ struct ethtool_module_power_mode_params power_new;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ int ret;
+
+ if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
+ return 0;
+
+ if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
+ "Setting power mode policy is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
+ ret = ops->get_module_power_mode(dev, &power, extack);
+ if (ret < 0)
+ return ret;
+
+ if (power_new.policy == power.policy)
+ return 0;
+ *p_mod = true;
+
+ return ops->set_module_power_mode(dev, &power_new, extack);
+}
+
+int ethnl_set_module(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MODULE_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = module_set_power_mode(dev, tb, &mod, info->extack);
+ if (ret < 0)
+ goto out_ops;
+
+ if (!mod)
+ goto out_ops;
+
+ ethtool_notify(dev, ETHTOOL_MSG_MODULE_NTF, NULL);
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 1797a0a90019..38b44c0291b1 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -282,6 +282,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_MODULE_EEPROM_GET] = &ethnl_module_eeprom_request_ops,
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
+ [ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -593,6 +594,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_PAUSE_NTF] = &ethnl_pause_request_ops,
[ETHTOOL_MSG_EEE_NTF] = &ethnl_eee_request_ops,
[ETHTOOL_MSG_FEC_NTF] = &ethnl_fec_request_ops,
+ [ETHTOOL_MSG_MODULE_NTF] = &ethnl_module_request_ops,
};
/* default notification handler */
@@ -686,6 +688,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
[ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify,
};
void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -999,6 +1002,22 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_phc_vclocks_get_policy,
.maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_MODULE_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_module_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_MODULE_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_module,
+ .policy = ethnl_module_set_policy,
+ .maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index e8987e28036f..836ee7157848 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -337,6 +337,7 @@ extern const struct ethnl_request_ops ethnl_fec_request_ops;
extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
extern const struct ethnl_request_ops ethnl_stats_request_ops;
extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
+extern const struct ethnl_request_ops ethnl_module_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -373,6 +374,8 @@ extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
+extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1];
+extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -391,6 +394,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_module(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 9009f412151e..ee1e5806bc93 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -56,8 +56,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base,
if (req_base->flags & ETHTOOL_FLAG_STATS)
n += nla_total_size(0) + /* _PAUSE_STATS */
- nla_total_size_64bit(sizeof(u64)) *
- (ETHTOOL_A_PAUSE_STAT_MAX - 2);
+ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT;
return n;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 26c32407f029..737e4f17e1c6 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -309,9 +309,9 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
}
spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
- hsr_stag->HSR_TLV_type = type;
+ hsr_stag->tlv.HSR_TLV_type = type;
/* TODO: Why 12 in HSRv0? */
- hsr_stag->HSR_TLV_length = hsr->prot_version ?
+ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ?
sizeof(struct hsr_sup_payload) : 12;
/* Payload: MacAddressA */
@@ -350,8 +350,8 @@ static void send_prp_supervision_frame(struct hsr_port *master,
spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
hsr->sup_sequence_nr++;
- hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
- hsr_stag->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+ hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
+ hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload);
/* Payload: MacAddressA */
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
@@ -493,7 +493,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
INIT_LIST_HEAD(&hsr->self_node_db);
spin_lock_init(&hsr->list_lock);
- ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
+ eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
/* initialize protocol specific functions */
if (protocol_version == PRP_V1) {
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index ceb8afb2a62f..e59cbb4f0cd1 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -37,6 +37,8 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
struct ethhdr *eth_hdr;
struct hsr_sup_tag *hsr_sup_tag;
struct hsrv1_ethhdr_sp *hsr_V1_hdr;
+ struct hsr_sup_tlv *hsr_sup_tlv;
+ u16 total_length = 0;
WARN_ON_ONCE(!skb_mac_header_was_set(skb));
eth_hdr = (struct ethhdr *)skb_mac_header(skb);
@@ -53,23 +55,63 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
/* Get the supervision header from correct location. */
if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */
+ total_length = sizeof(struct hsrv1_ethhdr_sp);
+ if (!pskb_may_pull(skb, total_length))
+ return false;
+
hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb);
if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP))
return false;
hsr_sup_tag = &hsr_V1_hdr->hsr_sup;
} else {
+ total_length = sizeof(struct hsrv0_ethhdr_sp);
+ if (!pskb_may_pull(skb, total_length))
+ return false;
+
hsr_sup_tag =
&((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup;
}
- if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE &&
- hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK &&
- hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD &&
- hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA)
+ if (hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_ANNOUNCE &&
+ hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_LIFE_CHECK &&
+ hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD &&
+ hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA)
return false;
- if (hsr_sup_tag->HSR_TLV_length != 12 &&
- hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload))
+ if (hsr_sup_tag->tlv.HSR_TLV_length != 12 &&
+ hsr_sup_tag->tlv.HSR_TLV_length != sizeof(struct hsr_sup_payload))
+ return false;
+
+ /* Get next tlv */
+ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ if (!pskb_may_pull(skb, total_length))
+ return false;
+ skb_pull(skb, total_length);
+ hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data;
+ skb_push(skb, total_length);
+
+ /* if this is a redbox supervision frame we need to verify
+ * that more data is available
+ */
+ if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) {
+ /* tlv length must be a length of a mac address */
+ if (hsr_sup_tlv->HSR_TLV_length != sizeof(struct hsr_sup_payload))
+ return false;
+
+ /* make sure another tlv follows */
+ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length;
+ if (!pskb_may_pull(skb, total_length))
+ return false;
+
+ /* get next tlv */
+ skb_pull(skb, total_length);
+ hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data;
+ skb_push(skb, total_length);
+ }
+
+ /* end of tlvs must follow at the end */
+ if (hsr_sup_tlv->HSR_TLV_type == HSR_TLV_EOT &&
+ hsr_sup_tlv->HSR_TLV_length != 0)
return false;
return true;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index e31949479305..0775f0f95dbf 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -76,8 +76,8 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
* frames from self that's been looped over the HSR ring.
*/
int hsr_create_self_node(struct hsr_priv *hsr,
- unsigned char addr_a[ETH_ALEN],
- unsigned char addr_b[ETH_ALEN])
+ const unsigned char addr_a[ETH_ALEN],
+ const unsigned char addr_b[ETH_ALEN])
{
struct list_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
@@ -265,11 +265,14 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
struct hsr_port *port_rcv = frame->port_rcv;
struct hsr_priv *hsr = port_rcv->hsr;
struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tlv *hsr_sup_tlv;
struct hsr_node *node_real;
struct sk_buff *skb = NULL;
struct list_head *node_db;
struct ethhdr *ethhdr;
int i;
+ unsigned int pull_size = 0;
+ unsigned int total_pull_size = 0;
/* Here either frame->skb_hsr or frame->skb_prp should be
* valid as supervision frame always will have protocol
@@ -284,18 +287,26 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
if (!skb)
return;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
-
/* Leave the ethernet header. */
- skb_pull(skb, sizeof(struct ethhdr));
+ pull_size = sizeof(struct ethhdr);
+ skb_pull(skb, pull_size);
+ total_pull_size += pull_size;
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
/* And leave the HSR tag. */
- if (ethhdr->h_proto == htons(ETH_P_HSR))
- skb_pull(skb, sizeof(struct hsr_tag));
+ if (ethhdr->h_proto == htons(ETH_P_HSR)) {
+ pull_size = sizeof(struct ethhdr);
+ skb_pull(skb, pull_size);
+ total_pull_size += pull_size;
+ }
/* And leave the HSR sup tag. */
- skb_pull(skb, sizeof(struct hsr_sup_tag));
+ pull_size = sizeof(struct hsr_tag);
+ skb_pull(skb, pull_size);
+ total_pull_size += pull_size;
+ /* get HSR sup payload */
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
@@ -312,6 +323,37 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
/* Node has already been merged */
goto done;
+ /* Leave the first HSR sup payload. */
+ pull_size = sizeof(struct hsr_sup_payload);
+ skb_pull(skb, pull_size);
+ total_pull_size += pull_size;
+
+ /* Get second supervision tlv */
+ hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data;
+ /* And check if it is a redbox mac TLV */
+ if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) {
+ /* We could stop here after pushing hsr_sup_payload,
+ * or proceed and allow macaddress_B and for redboxes.
+ */
+ /* Sanity check length */
+ if (hsr_sup_tlv->HSR_TLV_length != 6)
+ goto done;
+
+ /* Leave the second HSR sup tlv. */
+ pull_size = sizeof(struct hsr_sup_tlv);
+ skb_pull(skb, pull_size);
+ total_pull_size += pull_size;
+
+ /* Get redbox mac address. */
+ hsr_sp = (struct hsr_sup_payload *)skb->data;
+
+ /* Check if redbox mac and node mac are equal. */
+ if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
+ /* This is a redbox supervision frame for a VDAN! */
+ goto done;
+ }
+ }
+
ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
for (i = 0; i < HSR_PT_PORTS; i++) {
if (!node_curr->time_in_stale[i] &&
@@ -331,11 +373,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
kfree_rcu(node_curr, rcu_head);
done:
- /* PRP uses v0 header */
- if (ethhdr->h_proto == htons(ETH_P_HSR))
- skb_push(skb, sizeof(struct hsrv1_ethhdr_sp));
- else
- skb_push(skb, sizeof(struct hsrv0_ethhdr_sp));
+ /* Push back here */
+ skb_push(skb, total_pull_size);
}
/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index d9628e7a5f05..bdbb8c822ba1 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -48,8 +48,8 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
void hsr_prune_nodes(struct timer_list *t);
int hsr_create_self_node(struct hsr_priv *hsr,
- unsigned char addr_a[ETH_ALEN],
- unsigned char addr_b[ETH_ALEN]);
+ const unsigned char addr_a[ETH_ALEN],
+ const unsigned char addr_b[ETH_ALEN]);
void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN]);
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index f7e284f23b1f..b099c3150150 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -75,7 +75,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
if (port->type == HSR_PT_SLAVE_A) {
- ether_addr_copy(master->dev->dev_addr, dev->dev_addr);
+ eth_hw_addr_set(master->dev, dev->dev_addr);
call_netdevice_notifiers(NETDEV_CHANGEADDR,
master->dev);
}
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 53d1f7a82463..043e4e9a1694 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -35,13 +35,15 @@
* HSR_NODE_FORGET_TIME?
*/
#define PRUNE_PERIOD 3000 /* ms */
-
+#define HSR_TLV_EOT 0 /* End of TLVs */
#define HSR_TLV_ANNOUNCE 22
#define HSR_TLV_LIFE_CHECK 23
/* PRP V1 life check for Duplicate discard */
#define PRP_TLV_LIFE_CHECK_DD 20
/* PRP V1 life check for Duplicate Accept */
#define PRP_TLV_LIFE_CHECK_DA 21
+/* PRP V1 life redundancy box MAC address */
+#define PRP_TLV_REDBOX_MAC 30
/* HSR Tag.
* As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
@@ -94,14 +96,18 @@ struct hsr_vlan_ethhdr {
struct hsr_tag hsr_tag;
} __packed;
+struct hsr_sup_tlv {
+ u8 HSR_TLV_type;
+ u8 HSR_TLV_length;
+};
+
/* HSR/PRP Supervision Frame data types.
* Field names as defined in the IEC:2010 standard for HSR.
*/
struct hsr_sup_tag {
- __be16 path_and_HSR_ver;
- __be16 sequence_nr;
- __u8 HSR_TLV_type;
- __u8 HSR_TLV_length;
+ __be16 path_and_HSR_ver;
+ __be16 sequence_nr;
+ struct hsr_sup_tlv tlv;
} __packed;
struct hsr_sup_payload {
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 3297e7fa9945..2cf62718a282 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -157,7 +157,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
lowpan_802154_dev(ldev)->wdev = wdev;
/* Set the lowpan hardware address to the wpan hardware address. */
- memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
+ __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN);
/* We need headroom for possible wpan_dev_hard_header call and tailroom
* for encryption/fcs handling. The lowpan interface will replace
* the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1d816a5fd3eb..0189e3cd4a7d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -133,13 +133,9 @@ void inet_sock_destruct(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
__skb_queue_purge(&sk->sk_receive_queue);
- if (sk->sk_rx_skb_cache) {
- __kfree_skb(sk->sk_rx_skb_cache);
- sk->sk_rx_skb_cache = NULL;
- }
__skb_queue_purge(&sk->sk_error_queue);
- sk_mem_reclaim(sk);
+ sk_mem_reclaim_final(sk);
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
pr_err("Attempt to release TCP socket in state %d %p\n",
@@ -154,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON(sk_forward_alloc_get(sk));
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
@@ -773,26 +769,28 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
sin->sin_family = AF_INET;
+ lock_sock(sk);
if (peer) {
if (!inet->inet_dport ||
(((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
- peer == 1))
+ peer == 1)) {
+ release_sock(sk);
return -ENOTCONN;
+ }
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET4_GETPEERNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET4_GETPEERNAME);
} else {
__be32 addr = inet->inet_rcv_saddr;
if (!addr)
addr = inet->inet_saddr;
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET4_GETSOCKNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET4_GETSOCKNAME);
}
+ release_sock(sk);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return sizeof(*sin);
}
@@ -1666,12 +1664,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
}
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
-u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt)
-{
- return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt);
-}
-EXPORT_SYMBOL_GPL(snmp_get_cpu_field);
-
unsigned long snmp_fold_field(void __percpu *mib, int offt)
{
unsigned long res = 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 922dd73e5740..857a144b1ea9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1247,6 +1247,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_change_info *change_info;
+ struct in_device *in_dev;
+ bool evict_nocarrier;
switch (event) {
case NETDEV_CHANGEADDR:
@@ -1257,7 +1259,14 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&arp_tbl, dev);
- if (!netif_carrier_ok(dev))
+
+ in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev)
+ evict_nocarrier = true;
+ else
+ evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev);
+
+ if (evict_nocarrier && !netif_carrier_ok(dev))
neigh_carrier_down(&arp_tbl, dev);
break;
default:
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 0dcee9df1326..2cf02b4d77fb 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -81,14 +81,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
- if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
- return false;
- if (type != BPF_READ)
- return false;
- if (off % size != 0)
- return false;
-
- if (!btf_ctx_access(off, size, type, prog, info))
+ if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info))
return false;
if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
@@ -223,41 +216,13 @@ BTF_ID(func, tcp_reno_cong_avoid)
BTF_ID(func, tcp_reno_undo_cwnd)
BTF_ID(func, tcp_slow_start)
BTF_ID(func, tcp_cong_avoid_ai)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
-#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
-BTF_ID(func, cubictcp_init)
-BTF_ID(func, cubictcp_recalc_ssthresh)
-BTF_ID(func, cubictcp_cong_avoid)
-BTF_ID(func, cubictcp_state)
-BTF_ID(func, cubictcp_cwnd_event)
-BTF_ID(func, cubictcp_acked)
-#endif
-#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP)
-BTF_ID(func, dctcp_init)
-BTF_ID(func, dctcp_update_alpha)
-BTF_ID(func, dctcp_cwnd_event)
-BTF_ID(func, dctcp_ssthresh)
-BTF_ID(func, dctcp_cwnd_undo)
-BTF_ID(func, dctcp_state)
-#endif
-#if IS_BUILTIN(CONFIG_TCP_CONG_BBR)
-BTF_ID(func, bbr_init)
-BTF_ID(func, bbr_main)
-BTF_ID(func, bbr_sndbuf_expand)
-BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_event)
-BTF_ID(func, bbr_ssthresh)
-BTF_ID(func, bbr_min_tso_segs)
-BTF_ID(func, bbr_set_state)
-#endif
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_X86 */
BTF_SET_END(bpf_tcp_ca_kfunc_ids)
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
+static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
{
- return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id);
+ if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
+ return true;
+ return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
}
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 099259fc826a..62d5f99760aa 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -73,7 +73,7 @@ struct cipso_v4_map_cache_entry {
static struct cipso_v4_map_cache_bkt *cipso_v4_cache;
/* Restricted bitmap (tag #1) flags */
-int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_optfmt;
int cipso_v4_rbm_strictvalid = 1;
/*
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4a8550c49202..48f337ccf949 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/module.h>
-#include <linux/ip.h>
#include <linux/in.h>
#include <net/ip.h>
#include <net/sock.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f4468980b675..ec73a0d52d3e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -75,6 +75,7 @@ static struct ipv4_devconf ipv4_devconf = {
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
+ [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
},
};
@@ -87,6 +88,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
+ [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
},
};
@@ -2532,6 +2534,8 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
+ "arp_evict_nocarrier"),
DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
"force_igmp_version"),
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index 0c28bd469a68..0e23ade74493 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -6,7 +6,6 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/fib_notifier.h>
-#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
int call_fib4_notifier(struct notifier_block *nb,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b42c429cebbe..3364cb9c67e0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
- int nh_weight, u8 rt_family)
+ int nh_weight, u8 rt_family, u32 nh_tclassid)
{
const struct net_device *dev = nhc->nhc_dev;
struct rtnexthop *rtnh;
@@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
rtnh->rtnh_flags = flags;
+ if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
+ goto nla_put_failure;
+
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
@@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
}
for_nexthops(fi) {
- if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
- AF_INET) < 0)
- goto nla_put_failure;
+ u32 nh_tclassid = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ nh_tclassid = nh->nh_tclassid;
#endif
+ if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+ AF_INET, nh_tclassid) < 0)
+ goto nla_put_failure;
} endfor_nexthops(fi);
mp_end:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8b30cadff708..b7e277d8a84d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1054,14 +1054,19 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
if (!ext_hdr || !iio)
goto send_mal_query;
- if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
+ if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
+ ntohs(iio->extobj_hdr.length) > sizeof(_iio))
goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
+ iio = skb_header_pointer(skb, sizeof(_ext_hdr),
+ sizeof(iio->extobj_hdr) + ident_len, &_iio);
+ if (!iio)
+ goto send_mal_query;
+
status = 0;
dev = NULL;
switch (iio->extobj_hdr.class_type) {
case ICMP_EXT_ECHO_CTYPE_NAME:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len >= IFNAMSIZ)
goto send_mal_query;
memset(buff, 0, sizeof(buff));
@@ -1069,30 +1074,24 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
dev = dev_get_by_name(net, buff);
break;
case ICMP_EXT_ECHO_CTYPE_INDEX:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(iio->ident.ifindex), &_iio);
if (ident_len != sizeof(iio->ident.ifindex))
goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
break;
case ICMP_EXT_ECHO_CTYPE_ADDR:
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+ if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
+ ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen)
goto send_mal_query;
switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
case ICMP_AFI_IP:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(struct in_addr), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in6_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
dev_hold(dev);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f25d02ad4a8a..f7fea3a7c5e6 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1015,7 +1015,7 @@ void inet_csk_destroy_sock(struct sock *sk)
sk_refcnt_debug_release(sk);
- percpu_counter_dec(sk->sk_prot->orphan_count);
+ this_cpu_dec(*sk->sk_prot->orphan_count);
sock_put(sk);
}
@@ -1074,7 +1074,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
sock_orphan(child);
- percpu_counter_inc(sk->sk_prot->orphan_count);
+ this_cpu_inc(*sk->sk_prot->orphan_count);
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ef7897226f08..c8fa6e7f7d12 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -271,7 +271,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
- .idiag_fmem = sk->sk_forward_alloc,
+ .idiag_fmem = sk_forward_alloc_get(sk),
.idiag_tmem = sk_wmem_alloc_get(sk),
};
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..75737267746f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
- score = sk->sk_family == PF_INET ? 2 : 1;
+ if (sk->sk_family == PF_INET)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
@@ -596,7 +598,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ok) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
} else {
- percpu_counter_inc(sk->sk_prot->orphan_count);
+ this_cpu_inc(*sk->sk_prot->orphan_count);
inet_sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0fe6c936dc54..2ac2b95c5694 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -986,7 +986,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
__gre_tunnel_init(dev);
- memcpy(dev->dev_addr, &iph->saddr, 4);
+ __dev_addr_set(dev, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
dev->flags = IFF_NOARP;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b297bb28556e..38d29b175ca6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -886,6 +886,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
return ip_mc_leave_group(sk, &mreq);
}
+DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
+
static int do_ip_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1352,7 +1354,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
goto e_inval;
if (val < 0 || val > 255)
goto e_inval;
- inet->min_ttl = val;
+
+ if (val)
+ static_branch_enable(&ip4_min_ttl);
+
+ /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
+ * while we are changint it.
+ */
+ WRITE_ONCE(inet->min_ttl, val);
break;
default:
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe9101d3d69e..5a473319d3a5 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -834,7 +834,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p->iph.saddr, 4);
+ __dev_addr_set(dev, &p->iph.saddr, 4);
memcpy(dev->broadcast, &p->iph.daddr, 4);
}
ip_tunnel_add(itn, t);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index efe25a0172e6..8c2bd1d9ddce 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -425,7 +425,7 @@ static int vti_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- memcpy(dev->dev_addr, &iph->saddr, 4);
+ __dev_addr_set(dev, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
dev->flags = IFF_NOARP;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 816d8aad5a68..9d41d5d5cd1e 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -262,6 +262,11 @@ static int __init ic_open_devs(void)
dev->name, able, d->xid);
}
}
+ /* Devices with a complex topology like SFP ethernet interfaces needs
+ * the rtnl_lock at init. The carrier wait-loop must therefore run
+ * without holding it.
+ */
+ rtnl_unlock();
/* no point in waiting if we could not bring up at least one device */
if (!ic_first_dev)
@@ -274,9 +279,13 @@ static int __init ic_open_devs(void)
msecs_to_jiffies(carrier_timeout * 1000))) {
int wait, elapsed;
+ rtnl_lock();
for_each_netdev(&init_net, dev)
- if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
+ if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) {
+ rtnl_unlock();
goto have_carrier;
+ }
+ rtnl_unlock();
msleep(1);
@@ -289,7 +298,6 @@ static int __init ic_open_devs(void)
next_msg = jiffies + msecs_to_jiffies(20000);
}
have_carrier:
- rtnl_unlock();
*last = NULL;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3aa78ccbec3e..123ea63a04cb 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -380,7 +380,7 @@ static int ipip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+ __dev_addr_set(dev, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
tunnel->tun_hlen = 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c53f14b94356..ffc0cab7cf18 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -179,10 +179,11 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
return (void *)entry + entry->next_offset;
}
-unsigned int arpt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+unsigned int arpt_do_table(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 3de78416ec76..78cd5ee24448 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -26,14 +26,6 @@ static const struct xt_table packet_filter = {
.priority = NF_IP_PRI_FILTER,
};
-/* The work comes in here from netfilter.c */
-static unsigned int
-arptable_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return arpt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *arpfilter_ops __read_mostly;
static int arptable_filter_table_init(struct net *net)
@@ -72,7 +64,7 @@ static int __init arptable_filter_init(void)
if (ret < 0)
return ret;
- arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+ arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
if (IS_ERR(arpfilter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(arpfilter_ops);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 13acb687c19a..2ed7c58b471a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -222,10 +222,11 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ipt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+ipt_do_table(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 0eb0e2ab9bfc..b9062f4552ac 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -28,13 +28,6 @@ static const struct xt_table packet_filter = {
.priority = NF_IP_PRI_FILTER,
};
-static unsigned int
-iptable_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
@@ -90,7 +83,7 @@ static int __init iptable_filter_init(void)
if (ret < 0)
return ret;
- filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+ filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
if (IS_ERR(filter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 40417a3f930b..3abb430af9e6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -34,7 +34,7 @@ static const struct xt_table packet_mangler = {
};
static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
const struct iphdr *iph;
@@ -50,7 +50,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pri
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, state, priv);
+ ret = ipt_do_table(priv, skb, state);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
@@ -75,8 +75,8 @@ iptable_mangle_hook(void *priv,
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ipt_mangle_out(skb, state, priv);
- return ipt_do_table(skb, state, priv);
+ return ipt_mangle_out(priv, skb, state);
+ return ipt_do_table(priv, skb, state);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 45d7e072e6a5..56f6ecc43451 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -29,34 +29,27 @@ static const struct xt_table nf_nat_ipv4_table = {
.af = NFPROTO_IPV4,
};
-static unsigned int iptable_nat_do_chain(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index b88e0f36cd05..ca5e5b21587c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -32,17 +32,9 @@ static const struct xt_table packet_raw_before_defrag = {
.priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-iptable_raw_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init iptable_raw_table_init(struct net *net)
+static int iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
const struct xt_table *table = &packet_raw;
@@ -90,7 +82,7 @@ static int __init iptable_raw_init(void)
if (ret < 0)
return ret;
- rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
if (IS_ERR(rawtable_ops)) {
xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f519162a2fa5..d885443cb267 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -33,13 +33,6 @@ static const struct xt_table security_table = {
.priority = NF_IP_PRI_SECURITY,
};
-static unsigned int
-iptable_security_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *sectbl_ops __read_mostly;
static int iptable_security_table_init(struct net *net)
@@ -78,7 +71,7 @@ static int __init iptable_security_init(void)
if (ret < 0)
return ret;
- sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+ sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
if (IS_ERR(sectbl_ops)) {
xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 613432a36f0a..e61ea428ea18 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -20,13 +20,8 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-static unsigned int defrag4_pernet_id __read_mostly;
static DEFINE_MUTEX(defrag4_mutex);
-struct defrag4_pernet {
- unsigned int users;
-};
-
static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
@@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = {
static void __net_exit defrag4_net_exit(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
- if (nf_defrag->users) {
+ if (net->nf.defrag_ipv4_users) {
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
- nf_defrag->users = 0;
+ net->nf.defrag_ipv4_users = 0;
}
}
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
- .id = &defrag4_pernet_id,
- .size = sizeof(struct defrag4_pernet),
};
static int __init nf_defrag_init(void)
@@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv4_enable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
int err = 0;
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv4_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_defrag->users) {
- nf_defrag->users++;
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
if (err == 0)
- nf_defrag->users = 1;
+ net->nf.defrag_ipv4_users = 1;
out_unlock:
mutex_unlock(&defrag4_mutex);
@@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
void nf_defrag_ipv4_disable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users) {
- nf_defrag->users--;
- if (nf_defrag->users == 0)
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users--;
+ if (net->nf.defrag_ipv4_users == 0)
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 75ca4b6e484f..9e8100728d46 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
rcu_assign_pointer(old->nh_grp, newg);
if (newg->resilient) {
+ /* Make sure concurrent readers are not using 'oldg' anymore. */
+ synchronize_net();
rcu_assign_pointer(oldg->res_table, tmp_table);
rcu_assign_pointer(oldg->spare->res_table, tmp_table);
}
@@ -3565,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = {
};
static int nexthops_dump(struct net *net, struct notifier_block *nb,
+ enum nexthop_event_type event_type,
struct netlink_ext_ack *extack)
{
struct rb_root *root = &net->nexthop.rb_root;
@@ -3575,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
- err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
- extack);
+ err = call_nexthop_notifier(nb, net, event_type, nh, extack);
if (err)
break;
}
@@ -3590,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
int err;
rtnl_lock();
- err = nexthops_dump(net, nb, extack);
+ err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
@@ -3603,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
- return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
- nb);
+ int err;
+
+ rtnl_lock();
+ err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
+ nb);
+ if (err)
+ goto unlock;
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+unlock:
+ rtnl_unlock();
+ return err;
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b0d3a09dc84e..f30273afb539 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -53,7 +53,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
struct net *net = seq->private;
int orphans, sockets;
- orphans = percpu_counter_sum_positive(&tcp_orphan_count);
+ orphans = tcp_orphan_count_sum();
sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
socket_seq_show(seq);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6899ab5fb39..0b4103b1e622 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -61,15 +61,11 @@
#define pr_fmt(fmt) "IPv4: " fmt
#include <linux/module.h>
-#include <linux/uaccess.h>
#include <linux/bitops.h>
-#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/memblock.h>
-#include <linux/string.h>
#include <linux/socket.h>
-#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
@@ -84,20 +80,17 @@
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
-#include <linux/times.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/net_namespace.h>
-#include <net/protocol.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
-#include <net/arp.h>
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
@@ -109,7 +102,6 @@
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
-#include <net/l3mdev.h>
#include "fib_lookup.h"
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33792cf55a79..8696dc343ad2 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -7,8 +7,6 @@
*/
#include <linux/tcp.h>
-#include <linux/slab.h>
-#include <linux/random.h>
#include <linux/siphash.h>
#include <linux/kernel.h>
#include <linux/export.h>
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6f1e64d49232..97eb54774924 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -6,25 +6,16 @@
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
*/
-#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/sysctl.h>
-#include <linux/igmp.h>
-#include <linux/inetdevice.h>
#include <linux/seqlock.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/nsproxy.h>
-#include <linux/swap.h>
-#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
-#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/cipso_ipv4.h>
-#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/protocol.h>
#include <net/netevent.h>
@@ -594,18 +585,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
- {
- .procname = "tcp_rx_skb_cache",
- .data = &tcp_rx_skb_cache_key.key,
- .mode = 0644,
- .proc_handler = proc_do_static_key,
- },
- {
- .procname = "tcp_tx_skb_cache",
- .data = &tcp_tx_skb_cache_key.key,
- .mode = 0644,
- .proc_handler = proc_do_static_key,
- },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e8b48df73c85..b7796b4cf0a0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -260,7 +260,6 @@
#include <linux/random.h>
#include <linux/memblock.h>
#include <linux/highmem.h>
-#include <linux/swap.h>
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/time.h>
@@ -287,8 +286,8 @@ enum {
TCP_CMSG_TS = 2
};
-struct percpu_counter tcp_orphan_count;
-EXPORT_SYMBOL_GPL(tcp_orphan_count);
+DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);
@@ -325,11 +324,6 @@ struct tcp_splice_state {
unsigned long tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL_GPL(tcp_memory_pressure);
-DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
-EXPORT_SYMBOL(tcp_rx_skb_cache_key);
-
-DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
-
void tcp_enter_memory_pressure(struct sock *sk)
{
unsigned long val;
@@ -486,10 +480,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target)
{
if (tcp_epollin_ready(sk, target))
return true;
-
- if (sk->sk_prot->stream_memory_read)
- return sk->sk_prot->stream_memory_read(sk);
- return false;
+ return sk_is_readable(sk);
}
/*
@@ -647,7 +638,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
EXPORT_SYMBOL(tcp_ioctl);
-static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
+void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
tp->pushed_seq = tp->write_seq;
@@ -658,15 +649,13 @@ static inline bool forced_push(const struct tcp_sock *tp)
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
-static void skb_entail(struct sock *sk, struct sk_buff *skb)
+void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- skb->csum = 0;
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
- tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk_wmem_queued_add(sk, skb->truesize);
@@ -861,33 +850,19 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
- bool force_schedule)
+struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule)
{
struct sk_buff *skb;
- if (likely(!size)) {
- skb = sk->sk_tx_skb_cache;
- if (skb) {
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- sk->sk_tx_skb_cache = NULL;
- pskb_trim(skb, 0);
- INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
- skb_shinfo(skb)->tx_flags = 0;
- memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb));
- return skb;
- }
- }
- /* The TCP header must be at least 32-bit aligned. */
- size = ALIGN(size, 4);
-
if (unlikely(tcp_under_memory_pressure(sk)))
sk_mem_reclaim_partial(sk);
- skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
+ skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
if (likely(skb)) {
bool mem_scheduled;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
if (force_schedule) {
mem_scheduled = true;
sk_forced_mem_schedule(sk, skb->truesize);
@@ -895,12 +870,8 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
}
if (likely(mem_scheduled)) {
- skb_reserve(skb, sk->sk_prot->max_header);
- /*
- * Make sure that we have exactly size bytes
- * available to the caller, no more, no less.
- */
- skb->reserved_tailroom = skb->end - skb->tail - size;
+ skb_reserve(skb, MAX_TCP_HEADER);
+ skb->ip_summed = CHECKSUM_PARTIAL;
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
@@ -953,18 +924,20 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
* importantly be able to generate EPOLLOUT for Edge Trigger epoll()
* users.
*/
-void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+void tcp_remove_empty_skb(struct sock *sk)
{
- if (skb && !skb->len) {
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
+ if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
tcp_unlink_write_queue(skb, sk);
if (tcp_write_queue_empty(sk))
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
}
-struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
- struct page *page, int offset, size_t *size)
+static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
+ struct page *page, int offset, size_t *size)
{
struct sk_buff *skb = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -977,15 +950,15 @@ new_segment:
if (!sk_stream_memory_free(sk))
return NULL;
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
- tcp_rtx_and_write_queues_empty(sk));
+ skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ tcp_rtx_and_write_queues_empty(sk));
if (!skb)
return NULL;
#ifdef CONFIG_TLS_DEVICE
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
#endif
- skb_entail(sk, skb);
+ tcp_skb_entail(sk, skb);
copy = size_goal;
}
@@ -1016,7 +989,6 @@ new_segment:
skb->truesize += copy;
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
- skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1107,7 +1079,7 @@ out:
return copied;
do_error:
- tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
+ tcp_remove_empty_skb(sk);
if (copied)
goto out;
out_err:
@@ -1306,15 +1278,14 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
- first_skb);
+ skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ first_skb);
if (!skb)
goto wait_for_space;
process_backlog++;
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb_entail(sk, skb);
+ tcp_skb_entail(sk, skb);
copy = size_goal;
/* All packets are restored as if they have
@@ -1329,14 +1300,7 @@ new_segment:
if (copy > msg_data_left(msg))
copy = msg_data_left(msg);
- /* Where to copy to? */
- if (skb_availroom(skb) > 0 && !zc) {
- /* We have some space in skb head. Superb! */
- copy = min_t(int, copy, skb_availroom(skb));
- err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
- if (err)
- goto do_fault;
- } else if (!zc) {
+ if (!zc) {
bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1355,6 +1319,15 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
+ /* skb changing from pure zc to mixed, must charge zc */
+ if (unlikely(skb_zcopy_pure(skb))) {
+ if (!sk_wmem_schedule(sk, skb->data_len))
+ goto wait_for_space;
+
+ sk_mem_charge(sk, skb->data_len);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+
if (!sk_wmem_schedule(sk, copy))
goto wait_for_space;
@@ -1375,8 +1348,16 @@ new_segment:
}
pfrag->offset += copy;
} else {
- if (!sk_wmem_schedule(sk, copy))
- goto wait_for_space;
+ /* First append to a fragless skb builds initial
+ * pure zerocopy skb
+ */
+ if (!skb->len)
+ skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
+
+ if (!skb_zcopy_pure(skb)) {
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_space;
+ }
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
if (err == -EMSGSIZE || err == -EEXIST) {
@@ -1435,9 +1416,7 @@ out_nopush:
return copied + copied_syn;
do_error:
- skb = tcp_write_queue_tail(sk);
-do_fault:
- tcp_remove_empty_skb(sk, skb);
+ tcp_remove_empty_skb(sk);
if (copied + copied_syn)
goto out;
@@ -2690,11 +2669,36 @@ void tcp_shutdown(struct sock *sk, int how)
}
EXPORT_SYMBOL(tcp_shutdown);
+int tcp_orphan_count_sum(void)
+{
+ int i, total = 0;
+
+ for_each_possible_cpu(i)
+ total += per_cpu(tcp_orphan_count, i);
+
+ return max(total, 0);
+}
+
+static int tcp_orphan_cache;
+static struct timer_list tcp_orphan_timer;
+#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100)
+
+static void tcp_orphan_update(struct timer_list *unused)
+{
+ WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum());
+ mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
+}
+
+static bool tcp_too_many_orphans(int shift)
+{
+ return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
+}
+
bool tcp_check_oom(struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
- too_many_orphans = tcp_too_many_orphans(sk, shift);
+ too_many_orphans = tcp_too_many_orphans(shift);
out_of_socket_memory = tcp_out_of_memory(sk);
if (too_many_orphans)
@@ -2803,7 +2807,7 @@ adjudge_to_death:
/* remove backlog if any, without releasing ownership. */
__release_sock(sk);
- percpu_counter_inc(sk->sk_prot->orphan_count);
+ this_cpu_inc(tcp_orphan_count);
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
@@ -2906,7 +2910,7 @@ static void tcp_rtx_queue_purge(struct sock *sk)
* list_del(&skb->tcp_tsorted_anchor)
*/
tcp_rtx_queue_unlink(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
}
@@ -2917,14 +2921,9 @@ void tcp_write_queue_purge(struct sock *sk)
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
tcp_skb_tsorted_anchor_cleanup(skb);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
tcp_rtx_queue_purge(sk);
- skb = sk->sk_tx_skb_cache;
- if (skb) {
- __kfree_skb(skb);
- sk->sk_tx_skb_cache = NULL;
- }
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
@@ -2961,10 +2960,6 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
- if (sk->sk_rx_skb_cache) {
- __kfree_skb(sk->sk_rx_skb_cache);
- sk->sk_rx_skb_cache = NULL;
- }
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->urg_data = 0;
tcp_write_queue_purge(sk);
@@ -4505,7 +4500,10 @@ void __init tcp_init(void)
sizeof_field(struct sk_buff, cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
- percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
+
+ timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
+ mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
+
inet_hashinfo_init(&tcp_hashinfo);
inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
thash_entries, 21, /* one slot per 2 MB*/
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6274462b86b4..ec5550089b4d 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -56,6 +56,8 @@
* otherwise TCP stack falls back to an internal pacing using one high
* resolution timer per TCP socket and may use more resources.
*/
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/module.h>
#include <net/tcp.h>
#include <linux/inet_diag.h>
@@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
+BTF_SET_START(tcp_bbr_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, bbr_init)
+BTF_ID(func, bbr_main)
+BTF_ID(func, bbr_sndbuf_expand)
+BTF_ID(func, bbr_undo_cwnd)
+BTF_ID(func, bbr_cwnd_event)
+BTF_ID(func, bbr_ssthresh)
+BTF_ID(func, bbr_min_tso_segs)
+BTF_ID(func, bbr_set_state)
+#endif
+#endif
+BTF_SET_END(tcp_bbr_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+
static int __init bbr_register(void)
{
+ int ret;
+
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
- return tcp_register_congestion_control(&tcp_bbr_cong_ops);
+ ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
+ if (ret)
+ return ret;
+ register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
+ return 0;
}
static void __exit bbr_unregister(void)
{
+ unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index d3e9386b493e..f70aa0932bd6 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
#ifdef CONFIG_BPF_SYSCALL
-static bool tcp_bpf_stream_read(const struct sock *sk)
-{
- struct sk_psock *psock;
- bool empty = true;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (likely(psock))
- empty = list_empty(&psock->ingress_msg);
- rcu_read_unlock();
- return !empty;
-}
-
static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
long timeo)
{
@@ -185,6 +172,41 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
+static int tcp_bpf_recvmsg_parser(struct sock *sk,
+ struct msghdr *msg,
+ size_t len,
+ int nonblock,
+ int flags,
+ int *addr_len)
+{
+ struct sk_psock *psock;
+ int copied;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+ lock_sock(sk);
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ long timeo;
+ int data;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = tcp_msg_wait_data(sk, psock, timeo);
+ if (data && !sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ copied = -EAGAIN;
+ }
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return copied;
+}
+
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -232,6 +254,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir;
u32 tosend, delta = 0;
+ u32 eval = __SK_NONE;
int ret;
more_data:
@@ -275,13 +298,24 @@ more_data:
case __SK_REDIRECT:
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
+ if (!psock->apply_bytes) {
+ /* Clean up before releasing the sock lock. */
+ eval = psock->eval;
+ psock->eval = __SK_NONE;
+ psock->sk_redir = NULL;
+ }
if (psock->cork) {
cork = true;
psock->cork = NULL;
}
sk_msg_return(sk, msg, tosend);
release_sock(sk);
+
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+
+ if (eval == __SK_REDIRECT)
+ sock_put(sk_redir);
+
lock_sock(sk);
if (unlikely(ret < 0)) {
int free = sk_msg_free_nocharge(sk, msg);
@@ -465,6 +499,8 @@ enum {
enum {
TCP_BPF_BASE,
TCP_BPF_TX,
+ TCP_BPF_RX,
+ TCP_BPF_TXRX,
TCP_BPF_NUM_CFGS,
};
@@ -476,14 +512,19 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
- prot[TCP_BPF_BASE].unhash = sock_map_unhash;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
- prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
+ prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
+
+ prot[TCP_BPF_RX] = prot[TCP_BPF_BASE];
+ prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser;
+
+ prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX];
+ prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser;
}
static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
@@ -521,6 +562,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
+ if (psock->progs.stream_verdict || psock->progs.skb_verdict) {
+ config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX;
+ }
+
if (restore) {
if (inet_csk_has_ulp(sk)) {
/* TLS does not have an unhash proto in SW cases,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4a30deaa9a37..5e9d9c51164c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -25,6 +25,8 @@
*/
#include <linux/mm.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <net/tcp.h>
@@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
+BTF_SET_START(tcp_cubic_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, cubictcp_init)
+BTF_ID(func, cubictcp_recalc_ssthresh)
+BTF_ID(func, cubictcp_cong_avoid)
+BTF_ID(func, cubictcp_state)
+BTF_ID(func, cubictcp_cwnd_event)
+BTF_ID(func, cubictcp_acked)
+#endif
+#endif
+BTF_SET_END(tcp_cubic_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+
static int __init cubictcp_register(void)
{
+ int ret;
+
BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
/* Precompute a bunch of the scaling factors that are used per-packet
@@ -514,11 +533,16 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- return tcp_register_congestion_control(&cubictcp);
+ ret = tcp_register_congestion_control(&cubictcp);
+ if (ret)
+ return ret;
+ register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
+ return 0;
}
static void __exit cubictcp_unregister(void)
{
+ unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 79f705450c16..0d7ab3cc7b61 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -36,6 +36,8 @@
* Glenn Judd <glenn.judd@morganstanley.com>
*/
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <net/tcp.h>
@@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
+BTF_SET_START(tcp_dctcp_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, dctcp_init)
+BTF_ID(func, dctcp_update_alpha)
+BTF_ID(func, dctcp_cwnd_event)
+BTF_ID(func, dctcp_ssthresh)
+BTF_ID(func, dctcp_cwnd_undo)
+BTF_ID(func, dctcp_state)
+#endif
+#endif
+BTF_SET_END(tcp_dctcp_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+
static int __init dctcp_register(void)
{
+ int ret;
+
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
- return tcp_register_congestion_control(&dctcp);
+ ret = tcp_register_congestion_control(&dctcp);
+ if (ret)
+ return ret;
+ register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
+ return 0;
}
static void __exit dctcp_unregister(void)
{
+ unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 59412d6354a0..fdbcf2a6d08e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,13 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/list.h>
#include <linux/tcp.h>
#include <linux/rcupdate.h>
-#include <linux/rculist.h>
-#include <net/inetpeer.h>
#include <net/tcp.h>
void tcp_fastopen_init_key_once(struct net *net)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f7bd7ae7d7a..246ab7b5e857 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -500,8 +500,11 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
+ if (room <= 0)
+ return;
+
/* Check #1 */
- if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ if (!tcp_under_memory_pressure(sk)) {
unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
@@ -518,6 +521,11 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
tp->rcv_ssthresh += min(room, incr);
inet_csk(sk)->icsk_ack.quick |= 1;
}
+ } else {
+ /* Under pressure:
+ * Adjust rcv_ssthresh according to reserved mem
+ */
+ tcp_adjust_rcv_ssthresh(sk);
}
}
@@ -1346,7 +1354,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
if ((sacked & TCPCB_SACKED_ACKED) &&
before(start_seq, state->reord))
state->reord = start_seq;
@@ -3221,7 +3229,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
u32 pkts_acked = 0;
- u32 last_in_flight = 0;
bool rtt_update;
int flag = 0;
@@ -3257,7 +3264,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (!first_ackt)
first_ackt = last_ackt;
- last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
if (before(start_seq, reord))
reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
@@ -3323,8 +3329,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
- if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
- last_in_flight && !prior_sacked && fully_acked &&
+ if (pkts_acked == 1 && fully_acked && !prior_sacked &&
+ (tp->snd_una - prior_snd_una) < tp->mss_cache &&
sack->rate->prior_delivered + 1 == tp->delivered &&
!(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
/* Conservatively mark a delayed ACK. It's typically
@@ -3381,9 +3387,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = sack->rate->rtt_us,
- .in_flight = last_in_flight };
+ .rtt_us = sack->rate->rtt_us };
+ sample.in_flight = tp->mss_cache *
+ (tp->delivered - sack->rate->prior_delivered);
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
}
@@ -5346,7 +5353,7 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
else if (tcp_under_memory_pressure(sk))
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ tcp_adjust_rcv_ssthresh(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
@@ -5381,7 +5388,7 @@ static int tcp_prune_queue(struct sock *sk)
return -1;
}
-static bool tcp_should_expand_sndbuf(const struct sock *sk)
+static bool tcp_should_expand_sndbuf(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -5392,8 +5399,18 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
return false;
/* If we are under global TCP memory pressure, do not expand. */
- if (tcp_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk)) {
+ int unused_mem = sk_unused_reserved_mem(sk);
+
+ /* Adjust sndbuf according to reserved mem. But make sure
+ * it never goes below SOCK_MIN_SNDBUF.
+ * See sk_stream_moderate_sndbuf() for more details.
+ */
+ if (unused_mem > SOCK_MIN_SNDBUF)
+ WRITE_ONCE(sk->sk_sndbuf, unused_mem);
+
return false;
+ }
/* If we are under soft global TCP memory pressure, do not expand. */
if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e62e0d6373a..13d868c43284 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -508,9 +508,12 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
if (sk->sk_state == TCP_CLOSE)
goto out;
- if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto out;
+ if (static_branch_unlikely(&ip4_min_ttl)) {
+ /* min_ttl can be changed concurrently from do_ip_setsockopt() */
+ if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto out;
+ }
}
tp = tcp_sk(sk);
@@ -1037,6 +1040,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
+static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
+{
+ if (!old)
+ return true;
+
+ /* l3index always overrides non-l3index */
+ if (old->l3index && new->l3index == 0)
+ return false;
+ if (old->l3index == 0 && new->l3index)
+ return true;
+
+ return old->prefixlen < new->prefixlen;
+}
+
/* Find the Key structure for an address. */
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
@@ -1059,7 +1076,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index)
continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
@@ -1074,8 +1091,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
match = false;
}
- if (match && (!best_match ||
- key->prefixlen > best_match->prefixlen))
+ if (match && better_md5_match(best_match, key))
best_match = key;
}
return best_match;
@@ -1085,7 +1101,7 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
int family, u8 prefixlen,
- int l3index)
+ int l3index, u8 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1105,7 +1121,9 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX))
+ continue;
+ if (key->l3index != l3index)
continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
@@ -1129,7 +1147,7 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
@@ -1137,7 +1155,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (key) {
/* Pre-existing entry - just update that one.
* Note that the key might be used concurrently.
@@ -1182,6 +1200,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->family = family;
key->prefixlen = prefixlen;
key->l3index = l3index;
+ key->flags = flags;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
@@ -1191,11 +1210,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen, int l3index)
+ u8 prefixlen, int l3index, u8 flags)
{
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1229,6 +1248,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
const union tcp_md5_addr *addr;
u8 prefixlen = 32;
int l3index = 0;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -1239,6 +1259,8 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (sin->sin_family != AF_INET)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -1246,7 +1268,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -1267,12 +1289,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
+ return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
+ return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -1596,7 +1618,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
+ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags,
key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
@@ -1684,7 +1706,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
- if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
!INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
dst, 0)) {
dst_release(dst);
@@ -1769,7 +1791,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
if (dst)
dst = dst_check(dst, 0);
if (dst &&
- inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
+ sk->sk_rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1941,7 +1963,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
- struct sk_buff *skb_to_free;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
const struct iphdr *iph;
@@ -2050,9 +2071,13 @@ process:
return 0;
}
}
- if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto discard_and_relse;
+
+ if (static_branch_unlikely(&ip4_min_ttl)) {
+ /* min_ttl can be changed concurrently from do_ip_setsockopt() */
+ if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto discard_and_relse;
+ }
}
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -2082,17 +2107,12 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- skb_to_free = sk->sk_rx_skb_cache;
- sk->sk_rx_skb_cache = NULL;
ret = tcp_v4_do_rcv(sk, skb);
} else {
if (tcp_add_backlog(sk, skb))
goto discard_and_relse;
- skb_to_free = NULL;
}
bh_unlock_sock(sk);
- if (skb_to_free)
- __kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
@@ -2182,7 +2202,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ sk->sk_rx_dst_ifindex = skb->skb_iif;
}
}
EXPORT_SYMBOL(inet_sk_rx_dst_set);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0a4f3f16140a..cf913a66df17 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -19,14 +19,7 @@
* Jorge Cwik, <jorge@laser.satlink.net>
*/
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysctl.h>
-#include <linux/workqueue.h>
-#include <linux/static_key.h>
#include <net/tcp.h>
-#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 95db7a11ba2a..ab552356bdba 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -25,7 +25,6 @@
* 1) Add mechanism to deal with reverse congestion.
*/
-#include <linux/mm.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <net/tcp.h>
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6d72f3ea48c4..2e6e5a70168e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -394,7 +394,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
skb->ip_summed = CHECKSUM_PARTIAL;
TCP_SKB_CB(skb)->tcp_flags = flags;
- TCP_SKB_CB(skb)->sacked = 0;
tcp_skb_pcount_set(skb, 1);
@@ -409,13 +408,13 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
return tp->snd_una != tp->snd_up;
}
-#define OPTION_SACK_ADVERTISE (1 << 0)
-#define OPTION_TS (1 << 1)
-#define OPTION_MD5 (1 << 2)
-#define OPTION_WSCALE (1 << 3)
-#define OPTION_FAST_OPEN_COOKIE (1 << 8)
-#define OPTION_SMC (1 << 9)
-#define OPTION_MPTCP (1 << 10)
+#define OPTION_SACK_ADVERTISE BIT(0)
+#define OPTION_TS BIT(1)
+#define OPTION_MD5 BIT(2)
+#define OPTION_WSCALE BIT(3)
+#define OPTION_FAST_OPEN_COOKIE BIT(8)
+#define OPTION_SMC BIT(9)
+#define OPTION_MPTCP BIT(10)
static void smc_options_write(__be32 *ptr, u16 *options)
{
@@ -1256,8 +1255,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) {
- TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- - tp->snd_una;
oskb = skb;
tcp_skb_tsorted_save(oskb) {
@@ -1562,11 +1559,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
return -ENOMEM;
}
- if (skb_unclone(skb, gfp))
+ if (skb_unclone_keeptruesize(skb, gfp))
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
+ buff = tcp_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
@@ -1592,8 +1589,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
skb_split(skb, buff, len);
- buff->ip_summed = CHECKSUM_PARTIAL;
-
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
@@ -1672,18 +1667,18 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
u32 delta_truesize;
- if (skb_unclone(skb, GFP_ATOMIC))
+ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
return -ENOMEM;
delta_truesize = __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
- skb->ip_summed = CHECKSUM_PARTIAL;
if (delta_truesize) {
skb->truesize -= delta_truesize;
sk_wmem_queued_add(sk, -delta_truesize);
- sk_mem_uncharge(sk, delta_truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_uncharge(sk, delta_truesize);
}
/* Any change of skb->len requires recalculation of tso factor. */
@@ -2123,7 +2118,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
skb, len, mss_now, gfp);
- buff = sk_stream_alloc_skb(sk, 0, gfp, true);
+ buff = tcp_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
skb_copy_decrypted(buff, skb);
@@ -2144,12 +2139,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
TCP_SKB_CB(buff)->tcp_flags = flags;
- /* This packet was never sent out yet, so no SACK bits. */
- TCP_SKB_CB(buff)->sacked = 0;
-
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2305,7 +2296,9 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
+ if (unlikely(TCP_SKB_CB(skb)->eor) ||
+ tcp_has_tx_tstamp(skb) ||
+ !skb_pure_zcopy_same(skb, next))
return false;
len -= skb->len;
@@ -2390,7 +2383,7 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
/* We're allowed to probe. Build it now. */
- nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
+ nskb = tcp_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
sk_wmem_queued_add(sk, nskb->truesize);
@@ -2403,9 +2396,6 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
- TCP_SKB_CB(nskb)->sacked = 0;
- nskb->csum = 0;
- nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
@@ -2425,7 +2415,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
tcp_skb_collapse_tstamp(nskb, skb);
tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
@@ -2969,8 +2959,7 @@ u32 __tcp_select_window(struct sock *sk)
icsk->icsk_ack.quick = 0;
if (tcp_under_memory_pressure(sk))
- tp->rcv_ssthresh = min(tp->rcv_ssthresh,
- 4U * tp->advmss);
+ tcp_adjust_rcv_ssthresh(sk);
/* free_space might become our new window, make sure we don't
* increase it due to wscale.
@@ -3048,13 +3037,9 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- if (next_skb_size) {
- if (next_skb_size <= skb_availroom(skb))
- skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
- next_skb_size);
- else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
- return false;
- }
+ if (next_skb_size && !tcp_skb_shift(skb, next_skb, 1, next_skb_size))
+ return false;
+
tcp_highest_sack_replace(sk, next_skb, skb);
/* Update sequence range on original skb. */
@@ -3184,7 +3169,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
- if (skb_unclone(skb, GFP_ATOMIC))
+ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
return -ENOMEM;
diff = tcp_skb_pcount(skb);
@@ -3757,10 +3742,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
- syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
+ syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
- syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (space) {
int copied = copy_from_iter(skb_put(syn_data, space), space,
@@ -3838,7 +3822,7 @@ int tcp_connect(struct sock *sk)
return 0;
}
- buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
+ buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS;
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de693565963..fbab921670cc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
+ TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
}
@@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
if (!rs->prior_delivered ||
after(scb->tx.delivered, rs->prior_delivered)) {
+ rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
@@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
}
rs->delivered = tp->delivered - rs->prior_delivered;
+ rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+ /* delivered_ce occupies less than 32 bits in the skb control block */
+ rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
+
/* Model sending data and receiving ACKs as separate pipeline phases
* for a window. Usually the ACK phase is longer, but with ACK
* compression the send phase can be longer. To be safe we use the
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8851c9463b4b..319dd7bbfe33 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -78,7 +78,6 @@
#include <asm/ioctls.h>
#include <linux/memblock.h>
#include <linux/highmem.h>
-#include <linux/swap.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
@@ -390,7 +389,8 @@ static int compute_score(struct sock *sk, struct net *net,
dif, sdif);
if (!dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if)
+ score += 4;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -1053,7 +1053,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
@@ -1361,7 +1361,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
}
up->len += size;
- if (!(up->corkflag || (flags&MSG_MORE)))
+ if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
@@ -2662,9 +2662,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- up->corkflag = 1;
+ WRITE_ONCE(up->corkflag, 1);
} else {
- up->corkflag = 0;
+ WRITE_ONCE(up->corkflag, 0);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2787,7 +2787,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = up->corkflag;
+ val = READ_ONCE(up->corkflag);
break;
case UDP_ENCAP:
@@ -2866,6 +2866,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
+ /* psock ingress_msg queue should not contain any bad checksum frames */
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a1d5f473878..bbe6569c9ad3 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index b97e3635acf5..8efaf8c3fe2a 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -2,11 +2,8 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/socket.h>
-#include <linux/udp.h>
-#include <linux/types.h>
#include <linux/kernel.h>
#include <net/dst_metadata.h>
-#include <net/net_namespace.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 0d122edc368d..b91003538d87 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void)
{
int err;
- udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
+ udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
if (!udp_tunnel_nic_workqueue)
return -ENOMEM;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f4555a88f86b..9d4f418f1bf8 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -8,9 +8,7 @@
#include <linux/skbuff.h>
#include <linux/module.h>
-#include <linux/mutex.h>
#include <net/xfrm.h>
-#include <net/ip.h>
#include <net/protocol.h>
static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e504204bca92..bf2e5e5fe142 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -332,10 +332,10 @@ config IPV6_IOAM6_LWTUNNEL
bool "IPv6: IOAM Pre-allocated Trace insertion support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
help
- Support for the inline insertion of IOAM Pre-allocated
- Trace Header (only on locally generated packets), using
- the lightweight tunnels mechanism.
+ Support for the insertion of IOAM Pre-allocated Trace
+ Header using the lightweight tunnels mechanism.
If unsure, say N.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 1bc7e143217b..3036a45e8a1e 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -5,16 +5,14 @@
obj-$(CONFIG_IPV6) += ipv6.o
-ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
+ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
-ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
-
-ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
+ipv6-$(CONFIG_SYSCTL) += sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
@@ -29,8 +27,6 @@ ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
-ipv6-objs += $(ipv6-y)
-
obj-$(CONFIG_INET6_AH) += ah6.o
obj-$(CONFIG_INET6_ESP) += esp6.o
obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o
@@ -48,7 +44,8 @@ obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
obj-$(CONFIG_IPV6_FOU) += fou6.o
obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
-obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
+obj-$(CONFIG_INET) += output_core.o protocol.o \
+ ip6_offload.o tcpv6_offload.o exthdrs_offload.o
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c6a90b7bbb70..3445f8017430 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_enabled = 0,
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
+ .ndisc_evict_nocarrier = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_enabled = 0,
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
+ .ndisc_evict_nocarrier = 1,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -2237,12 +2239,12 @@ static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev)
static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
{
- union fwnet_hwaddr *ha;
+ const union fwnet_hwaddr *ha;
if (dev->addr_len != FWNET_ALEN)
return -1;
- ha = (union fwnet_hwaddr *)dev->dev_addr;
+ ha = (const union fwnet_hwaddr *)dev->dev_addr;
memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
eui[0] ^= 2;
@@ -3110,6 +3112,9 @@ static void add_v4_addrs(struct inet6_dev *idev)
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
+ return;
+
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
@@ -5542,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
+ array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
}
static inline size_t inet6_ifla6_size(void)
@@ -6984,6 +6990,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_douintvec,
},
{
+ .procname = "ndisc_evict_nocarrier",
+ .data = &ipv6_devconf.ndisc_evict_nocarrier,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b5878bb8e419..0c4da163535a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -521,31 +521,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
+ lock_sock(sk);
if (peer) {
- if (!inet->inet_dport)
- return -ENOTCONN;
- if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
- peer == 1)
+ if (!inet->inet_dport ||
+ (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+ peer == 1)) {
+ release_sock(sk);
return -ENOTCONN;
+ }
sin->sin6_port = inet->inet_dport;
sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET6_GETPEERNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET6_GETPEERNAME);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
sin->sin6_addr = np->saddr;
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET6_GETSOCKNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET6_GETSOCKNAME);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
+ release_sock(sk);
return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3a871a09f962..38ece3b7b839 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -979,7 +979,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (!skb_valid_dst(skb))
ip6_route_input(skb);
- ioam6_fill_trace_data(skb, ns, trace);
+ ioam6_fill_trace_data(skb, ns, trace, true);
break;
default:
break;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index a1ac0e3d8c60..47447f0241df 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -610,7 +610,11 @@ int ila_xlat_init_net(struct net *net)
if (err)
return err;
- rhashtable_init(&ilan->xlat.rhash_table, &rht_params);
+ err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params);
+ if (err) {
+ free_bucket_spinlocks(ilan->xlat.locks);
+ return err;
+ }
return 0;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 55c290d55605..67c9114835c8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score = 1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 5e8961004832..122a3d47424c 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -631,7 +631,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_namespace *ns,
struct ioam6_trace_hdr *trace,
struct ioam6_schema *sc,
- u8 sclen)
+ u8 sclen, bool is_input)
{
struct __kernel_sock_timeval ts;
u64 raw64;
@@ -645,7 +645,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
/* hop_lim and node_id */
if (trace->type.bit0) {
byte = ipv6_hdr(skb)->hop_limit;
- if (skb->dev)
+ if (is_input)
byte--;
raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
@@ -730,7 +730,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
/* hop_lim and node_id (wide) */
if (trace->type.bit8) {
byte = ipv6_hdr(skb)->hop_limit;
- if (skb->dev)
+ if (is_input)
byte--;
raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
@@ -770,6 +770,66 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
data += sizeof(__be32);
}
+ /* bit12 undefined: filled with empty value */
+ if (trace->type.bit12) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit13 undefined: filled with empty value */
+ if (trace->type.bit13) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit14 undefined: filled with empty value */
+ if (trace->type.bit14) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit15 undefined: filled with empty value */
+ if (trace->type.bit15) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit16 undefined: filled with empty value */
+ if (trace->type.bit16) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit17 undefined: filled with empty value */
+ if (trace->type.bit17) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit18 undefined: filled with empty value */
+ if (trace->type.bit18) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit19 undefined: filled with empty value */
+ if (trace->type.bit19) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit20 undefined: filled with empty value */
+ if (trace->type.bit20) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit21 undefined: filled with empty value */
+ if (trace->type.bit21) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
/* opaque state snapshot */
if (trace->type.bit22) {
if (!sc) {
@@ -786,21 +846,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
/* called with rcu_read_lock() */
void ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_namespace *ns,
- struct ioam6_trace_hdr *trace)
+ struct ioam6_trace_hdr *trace,
+ bool is_input)
{
struct ioam6_schema *sc;
u8 sclen = 0;
- /* Skip if Overflow flag is set OR
- * if an unknown type (bit 12-21) is set
+ /* Skip if Overflow flag is set
*/
- if (trace->overflow ||
- trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
- trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
- trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21) {
+ if (trace->overflow)
return;
- }
/* NodeLen does not include Opaque State Snapshot length. We need to
* take it into account if the corresponding bit is set (bit 22) and
@@ -822,7 +877,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb,
return;
}
- __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input);
trace->remlen -= trace->nodelen + sclen;
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f9ee04541c17..f90a87389fcc 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/net.h>
-#include <linux/netlink.h>
#include <linux/in6.h>
#include <linux/ioam6.h>
#include <linux/ioam6_iptunnel.h>
@@ -17,18 +16,26 @@
#include <net/sock.h>
#include <net/lwtunnel.h>
#include <net/ioam6.h>
+#include <net/netlink.h>
+#include <net/ipv6.h>
+#include <net/dst_cache.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
#define IOAM6_MASK_SHORT_FIELDS 0xff100000
#define IOAM6_MASK_WIDE_FIELDS 0xe00000
struct ioam6_lwt_encap {
- struct ipv6_hopopt_hdr eh;
- u8 pad[2]; /* 2-octet padding for 4n-alignment */
- struct ioam6_hdr ioamh;
- struct ioam6_trace_hdr traceh;
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
} __packed;
struct ioam6_lwt {
+ struct dst_cache cache;
+ u8 mode;
+ struct in6_addr tundst;
struct ioam6_lwt_encap tuninfo;
};
@@ -42,40 +49,29 @@ static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
return &ioam6_lwt_state(lwt)->tuninfo;
}
-static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
{
return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
}
static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
+ IOAM6_IPTUNNEL_MODE_MIN,
+ IOAM6_IPTUNNEL_MODE_MAX),
+ [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
};
-static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
- struct ioam6_trace_hdr *trace)
-{
- struct ioam6_trace_hdr *data;
- struct nlattr *nla;
- int len;
-
- len = sizeof(*trace);
-
- nla = nla_reserve(skb, attrtype, len);
- if (!nla)
- return -EMSGSIZE;
-
- data = nla_data(nla);
- memcpy(data, trace, len);
-
- return 0;
-}
-
static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
{
u32 fields;
if (!trace->type_be32 || !trace->remlen ||
- trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21)
return false;
trace->nodelen = 0;
@@ -97,9 +93,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
struct ioam6_lwt_encap *tuninfo;
struct ioam6_trace_hdr *trace;
- struct lwtunnel_state *s;
- int len_aligned;
- int len, err;
+ struct lwtunnel_state *lwt;
+ struct ioam6_lwt *ilwt;
+ int len_aligned, err;
+ u8 mode;
if (family != AF_INET6)
return -EINVAL;
@@ -109,6 +106,16 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
+ if (!tb[IOAM6_IPTUNNEL_MODE])
+ mode = IOAM6_IPTUNNEL_MODE_INLINE;
+ else
+ mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+
+ if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
+ return -EINVAL;
+ }
+
if (!tb[IOAM6_IPTUNNEL_TRACE]) {
NL_SET_ERR_MSG(extack, "missing trace");
return -EINVAL;
@@ -121,15 +128,24 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
return -EINVAL;
}
- len = sizeof(*tuninfo) + trace->remlen * 4;
- len_aligned = ALIGN(len, 8);
-
- s = lwtunnel_state_alloc(len_aligned);
- if (!s)
+ len_aligned = ALIGN(trace->remlen * 4, 8);
+ lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
+ if (!lwt)
return -ENOMEM;
- tuninfo = ioam6_lwt_info(s);
- tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ ilwt = ioam6_lwt_state(lwt);
+ err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
+ if (err) {
+ kfree(lwt);
+ return err;
+ }
+
+ ilwt->mode = mode;
+ if (tb[IOAM6_IPTUNNEL_DST])
+ ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
+
+ tuninfo = ioam6_lwt_info(lwt);
+ tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
tuninfo->pad[0] = IPV6_TLV_PADN;
tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
@@ -138,27 +154,39 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
memcpy(&tuninfo->traceh, trace, sizeof(*trace));
- len = len_aligned - len;
- if (len == 1) {
- tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
- } else if (len > 0) {
+ if (len_aligned - trace->remlen * 4) {
tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
- tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
}
- s->type = LWTUNNEL_ENCAP_IOAM6;
- s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+ lwt->type = LWTUNNEL_ENCAP_IOAM6;
+ lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
- *ts = s;
+ *ts = lwt;
return 0;
}
-static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
{
struct ioam6_trace_hdr *trace;
- struct ipv6hdr *oldhdr, *hdr;
struct ioam6_namespace *ns;
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(net, trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace, false);
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
+ struct ioam6_lwt_encap *tuninfo)
+{
+ struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
@@ -187,80 +215,200 @@ static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
hdr->nexthdr = NEXTHDR_HOP;
hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
- trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
- + sizeof(struct ipv6_hopopt_hdr) + 2
- + sizeof(struct ioam6_hdr));
+ return ioam6_do_fill(net, skb);
+}
- ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
- if (ns)
- ioam6_fill_trace_data(skb, ns, trace);
+static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
+ struct ioam6_lwt_encap *tuninfo,
+ struct in6_addr *tundst)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *hdr, *inner_hdr;
+ int hdrlen, len, err;
- return 0;
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+ len = sizeof(*hdr) + hdrlen;
+
+ err = skb_cow_head(skb, len + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ inner_hdr = ipv6_hdr(skb);
+
+ skb_push(skb, len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ skb_set_transport_header(skb, sizeof(*hdr));
+
+ tuninfo->eh.nexthdr = NEXTHDR_IPV6;
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr = ipv6_hdr(skb);
+ memcpy(hdr, inner_hdr, sizeof(*hdr));
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+ hdr->daddr = *tundst;
+ ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
+
+ skb_postpush_rcsum(skb, hdr, len);
+
+ return ioam6_do_fill(net, skb);
}
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ struct dst_entry *dst = skb_dst(skb);
+ struct in6_addr orig_daddr;
+ struct ioam6_lwt *ilwt;
int err = -EINVAL;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- /* Only for packets we send and
- * that do not contain a Hop-by-Hop yet
- */
- if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
- goto out;
-
- err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
- if (unlikely(err))
+ ilwt = ioam6_lwt_state(dst->lwtstate);
+ orig_daddr = ipv6_hdr(skb)->daddr;
+
+ switch (ilwt->mode) {
+ case IOAM6_IPTUNNEL_MODE_INLINE:
+do_inline:
+ /* Direct insertion - if there is no Hop-by-Hop yet */
+ if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ if (unlikely(err))
+ goto drop;
+
+ break;
+ case IOAM6_IPTUNNEL_MODE_ENCAP:
+do_encap:
+ /* Encapsulation (ip6ip6) */
+ err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+ if (unlikely(err))
+ goto drop;
+
+ break;
+ case IOAM6_IPTUNNEL_MODE_AUTO:
+ /* Automatic (RFC8200 compliant):
+ * - local packets -> INLINE mode
+ * - in-transit packets -> ENCAP mode
+ */
+ if (!skb->dev)
+ goto do_inline;
+
+ goto do_encap;
+ default:
goto drop;
+ }
- err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
goto drop;
+ if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
+ preempt_disable();
+ dst = dst_cache_get(&ilwt->cache);
+ preempt_enable();
+
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = dst->error;
+ dst_release(dst);
+ goto drop;
+ }
+
+ preempt_disable();
+ dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
+ preempt_enable();
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ return dst_output(net, sk, skb);
+ }
out:
- return lwt->orig_output(net, sk, skb);
-
+ return dst->lwtstate->orig_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
+static void ioam6_destroy_state(struct lwtunnel_state *lwt)
+{
+ dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
+}
+
static int ioam6_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
- struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+ struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
+ int err;
- if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
- return -EMSGSIZE;
+ err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
+ if (err)
+ goto ret;
- return 0;
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
+ if (err)
+ goto ret;
+ }
+
+ err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
+ &ilwt->tuninfo.traceh);
+ret:
+ return err;
}
static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
{
- struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+ struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
+ int nlsize;
+
+ nlsize = nla_total_size(sizeof(ilwt->mode)) +
+ nla_total_size(sizeof(ilwt->tuninfo.traceh));
- return nla_total_size(sizeof(*trace));
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
+ nlsize += nla_total_size(sizeof(ilwt->tundst));
+
+ return nlsize;
}
static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
- struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
- struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
-
- return (a_hdr->namespace_id != b_hdr->namespace_id);
+ struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
+ struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
+ struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
+ struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
+
+ return (ilwt_a->mode != ilwt_b->mode ||
+ (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
+ !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
+ trace_a->namespace_id != trace_b->namespace_id);
}
static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
- .build_state = ioam6_build_state,
+ .build_state = ioam6_build_state,
+ .destroy_state = ioam6_destroy_state,
.output = ioam6_output,
- .fill_encap = ioam6_fill_encap_info,
+ .fill_encap = ioam6_fill_encap_info,
.get_encap_size = ioam6_encap_nlsize,
- .cmp_encap = ioam6_encap_cmp,
- .owner = THIS_MODULE,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
};
int __init ioam6_iptunnel_init(void)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bec5b22f80d..0371d2c14145 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
- int sernum = fib6_new_sernum(info->nl_net);
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!err) {
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
- __fib6_update_sernum_upto_root(rt, sernum);
+ __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
fib6_start_gc(info->nl_net, rt);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3ad201d372d8..d831d2439693 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1088,7 +1088,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
struct flowi6 *fl6 = &t->fl.u.ip6;
if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
}
@@ -1521,7 +1521,7 @@ static int ip6gre_tunnel_init(struct net_device *dev)
if (tunnel->parms.collect_md)
return 0;
- memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+ __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
if (ipv6_addr_any(&tunnel->parms.raddr))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 12f985f43bcc..2f044a49afa8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -464,13 +464,14 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
int ip6_forward(struct sk_buff *skb)
{
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
u32 mtu;
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 20a67efda47f..484aca492cc0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1449,7 +1449,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
unsigned int mtu;
int t_hlen;
- memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
/* Set up flowi template */
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 1d8e3ffa225d..527e9ead7449 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -660,7 +660,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
struct net_device *tdev = NULL;
int mtu;
- memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e4bdb09c5586..41efca817db4 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,6 +55,8 @@
struct ip6_ra_chain *ip6_ra_chain;
DEFINE_RWLOCK(ip6_ra_lock);
+DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount);
+
int ip6_ra_control(struct sock *sk, int sel)
{
struct ip6_ra_chain *ra, *new_ra, **rap;
@@ -950,7 +952,14 @@ done:
goto e_inval;
if (val < 0 || val > 255)
goto e_inval;
- np->min_hopcount = val;
+
+ if (val)
+ static_branch_enable(&ip6_min_hopcount);
+
+ /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
+ * while we are changing it.
+ */
+ WRITE_ONCE(np->min_hopcount, val);
retv = 0;
break;
case IPV6_DONTFRAG:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4b098521a44c..f03b597e4121 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -142,7 +142,7 @@ struct neigh_table nd_tbl = {
};
EXPORT_SYMBOL_GPL(nd_tbl);
-void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
int data_len, int pad)
{
int space = __ndisc_opt_addr_space(data_len, pad);
@@ -165,7 +165,7 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
- void *data, u8 icmp6_type)
+ const void *data, u8 icmp6_type)
{
__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
ndisc_addr_option_pad(skb->dev->type));
@@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
struct netdev_notifier_change_info *change_info;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
+ bool evict_nocarrier;
switch (event) {
case NETDEV_CHANGEADDR:
@@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
in6_dev_put(idev);
break;
case NETDEV_CHANGE:
+ idev = in6_dev_get(dev);
+ if (!idev)
+ evict_nocarrier = true;
+ else {
+ evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
+ net->ipv6.devconf_all->ndisc_evict_nocarrier;
+ in6_dev_put(idev);
+ }
+
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&nd_tbl, dev);
- if (!netif_carrier_ok(dev))
+ if (evict_nocarrier && !netif_carrier_ok(dev))
neigh_carrier_down(&nd_tbl, dev);
break;
case NETDEV_DOWN:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index de2cf3943b91..2d816277f2c5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ip6t_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+ip6t_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
/* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
+ acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.state = state;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 733c83d38b30..4ad8b2032f1f 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
static inline bool
segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
{
- bool r;
- pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
- invert ? '!' : ' ', min, id, max);
- r = (id >= min && id <= max) ^ invert;
- pr_debug(" result %s\n", r ? "PASS" : "FAILED");
- return r;
+ return (id >= min && id <= max) ^ invert;
}
static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
@@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
- pr_debug("TYPE %04X ", rh->type);
- pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-
- pr_debug("IPv6 RT segsleft %02X ",
- segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
- rh->segments_left,
- !!(rtinfo->invflags & IP6T_RT_INV_SGS)));
- pr_debug("type %02X %02X %02X ",
- rtinfo->rt_type, rh->type,
- (!(rtinfo->flags & IP6T_RT_TYP) ||
- ((rtinfo->rt_type == rh->type) ^
- !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
- pr_debug("len %02X %04X %02X ",
- rtinfo->hdrlen, hdrlen,
- !(rtinfo->flags & IP6T_RT_LEN) ||
- ((rtinfo->hdrlen == hdrlen) ^
- !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
- pr_debug("res %02X %02X %02X ",
- rtinfo->flags & IP6T_RT_RES,
- ((const struct rt0_hdr *)rh)->reserved,
- !((rtinfo->flags & IP6T_RT_RES) &&
- (((const struct rt0_hdr *)rh)->reserved)));
-
ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
rh->segments_left,
!!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
@@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
reserved),
sizeof(_reserved),
&_reserved);
+ if (!rp) {
+ par->hotdrop = true;
+ return false;
+ }
ret = (*rp == 0);
}
- pr_debug("#%d ", rtinfo->addrnr);
if (!(rtinfo->flags & IP6T_RT_FST)) {
return ret;
} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
- pr_debug("Not strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
- pr_debug("There isn't enough space\n");
return false;
} else {
unsigned int i = 0;
- pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0;
temp < (unsigned int)((hdrlen - 8) / 16);
temp++) {
@@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
- pr_debug("i=%d temp=%d;\n", i, temp);
+ if (ipv6_addr_equal(ap, &rtinfo->addrs[i]))
i++;
- }
if (i == rtinfo->addrnr)
break;
}
- pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
if (i == rtinfo->addrnr)
return ret;
else
return false;
}
} else {
- pr_debug("Strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
- pr_debug("There isn't enough space\n");
return false;
} else {
- pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0; temp < rtinfo->addrnr; temp++) {
ap = skb_header_pointer(skb,
ptr
@@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
}
- pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
if (temp == rtinfo->addrnr &&
temp == (unsigned int)((hdrlen - 8) / 16))
return ret;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 727ee8097012..df785ebda0ca 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -27,14 +27,6 @@ static const struct xt_table packet_filter = {
.priority = NF_IP6_PRI_FILTER,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
@@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void)
if (ret < 0)
return ret;
- filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+ filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
if (IS_ERR(filter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 9b518ce37d6a..a88b2ce4a3cb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = {
};
static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
struct in6_addr saddr, daddr;
@@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(skb, state, priv);
+ ret = ip6t_do_table(priv, skb, state);
if (ret != NF_DROP && ret != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ip6t_mangle_out(skb, state, priv);
- return ip6t_do_table(skb, state, priv);
+ return ip6t_mangle_out(priv, skb, state);
+ return ip6t_do_table(priv, skb, state);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 921c1723a01e..bf3cb3a13600 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = {
.af = NFPROTO_IPV6,
};
-static unsigned int ip6table_nat_do_chain(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 4f2a04af71d3..08861d5d1f4d 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = {
.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_raw_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *rawtable_ops __read_mostly;
static int ip6table_raw_table_init(struct net *net)
@@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void)
return ret;
/* Register hooks */
- rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
if (IS_ERR(rawtable_ops)) {
xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 931674034d8b..4df14a9bae78 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -32,13 +32,6 @@ static const struct xt_table security_table = {
.priority = NF_IP6_PRI_SECURITY,
};
-static unsigned int
-ip6table_security_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *sectbl_ops __read_mostly;
static int ip6table_security_table_init(struct net *net)
@@ -77,7 +70,7 @@ static int __init ip6table_security_init(void)
if (ret < 0)
return ret;
- sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+ sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
if (IS_ERR(sectbl_ops)) {
xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a0108415275f..5c47be29b9ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -33,7 +33,7 @@
static const char nf_frags_cache_name[] = "nf-frags";
-unsigned int nf_frag_pernet_id __read_mostly;
+static unsigned int nf_frag_pernet_id __read_mostly;
static struct inet_frags nf_frags;
static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e8a59d8bf2ad..cb4eb1d2c620 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -25,8 +25,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-extern unsigned int nf_frag_pernet_id;
-
static DEFINE_MUTEX(defrag6_mutex);
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = {
static void __net_exit defrag6_net_exit(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
- if (nf_frag->users) {
+ if (net->nf.defrag_ipv6_users) {
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
- nf_frag->users = 0;
+ net->nf.defrag_ipv6_users = 0;
}
}
@@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv6_enable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
int err = 0;
mutex_lock(&defrag6_mutex);
- if (nf_frag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv6_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_frag->users) {
- nf_frag->users++;
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
if (err == 0)
- nf_frag->users = 1;
+ net->nf.defrag_ipv6_users = 1;
out_unlock:
mutex_unlock(&defrag6_mutex);
@@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
void nf_defrag_ipv6_disable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
mutex_lock(&defrag6_mutex);
- if (nf_frag->users) {
- nf_frag->users--;
- if (nf_frag->users == 0)
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users--;
+ if (net->nf.defrag_ipv6_users == 0)
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbc224023977..3ae25b8ffbd6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
- rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
+ rt->fib6_nh->fib_nh_weight, AF_INET6,
+ 0) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6) < 0)
+ AF_INET6, 0) < 0)
goto nla_put_failure;
}
@@ -6305,11 +6306,11 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
static struct ctl_table ipv6_route_table_template[] = {
{
- .procname = "flush",
- .data = &init_net.ipv6.sysctl.flush_delay,
+ .procname = "max_size",
+ .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
.maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = ipv6_sysctl_rtcache_flush
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
.procname = "gc_thresh",
@@ -6319,11 +6320,11 @@ static struct ctl_table ipv6_route_table_template[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "max_size",
- .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
+ .procname = "flush",
+ .data = &init_net.ipv6.sysctl.flush_delay,
.maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .mode = 0200,
+ .proc_handler = ipv6_sysctl_rtcache_flush
},
{
.procname = "gc_min_interval",
@@ -6395,10 +6396,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
GFP_KERNEL);
if (table) {
- table[0].data = &net->ipv6.sysctl.flush_delay;
- table[0].extra1 = net;
+ table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
- table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+ table[2].data = &net->ipv6.sysctl.flush_delay;
+ table[2].extra1 = net;
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
@@ -6410,7 +6411,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
+ table[1].procname = NULL;
}
return table;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index e412817fba2f..a8b5784afb1a 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -374,7 +374,11 @@ static int __net_init seg6_net_init(struct net *net)
net->ipv6.seg6_data = sdata;
#ifdef CONFIG_IPV6_SEG6_HMAC
- seg6_hmac_net_init(net);
+ if (seg6_hmac_net_init(net)) {
+ kfree(rcu_dereference_raw(sdata->tun_src));
+ kfree(sdata);
+ return -ENOMEM;
+ }
#endif
return 0;
@@ -388,7 +392,7 @@ static void __net_exit seg6_net_exit(struct net *net)
seg6_hmac_net_exit(net);
#endif
- kfree(sdata->tun_src);
+ kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 687d95dce085..29bc4e7c3046 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -405,9 +405,7 @@ int __net_init seg6_hmac_net_init(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
- rhashtable_init(&sdata->hmac_infos, &rht_params);
-
- return 0;
+ return rhashtable_init(&sdata->hmac_infos, &rht_params);
}
EXPORT_SYMBOL(seg6_hmac_net_init);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ef0c7a7c18e2..1b57ee36d668 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -204,7 +204,7 @@ static int ipip6_tunnel_create(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
int err;
- memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ __dev_addr_set(dev, &t->parms.iph.saddr, 4);
memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
if ((__force u16)t->parms.i_flags & SIT_ISATAP)
@@ -1149,7 +1149,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
synchronize_net();
t->parms.iph.saddr = p->iph.saddr;
t->parms.iph.daddr = p->iph.daddr;
- memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+ __dev_addr_set(t->dev, &p->iph.saddr, 4);
memcpy(t->dev->broadcast, &p->iph.daddr, 4);
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0ce52d46e4f8..551fce49841d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -108,8 +108,8 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
const struct rt6_info *rt = (const struct rt6_info *)dst;
sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_ifindex = skb->skb_iif;
+ sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -414,9 +414,12 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk->sk_state == TCP_CLOSE)
goto out;
- if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto out;
+ if (static_branch_unlikely(&ip6_min_hopcount)) {
+ /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
+ if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto out;
+ }
}
tp = tcp_sk(sk);
@@ -569,7 +572,7 @@ done:
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->ipv6_opt);
- kfree_skb(inet_rsk(req)->pktopts);
+ consume_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -599,6 +602,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
int l3index = 0;
u8 prefixlen;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -609,6 +613,8 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (sin6->sin6_family != AF_INET6)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -619,7 +625,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -640,9 +646,9 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen,
- l3index);
+ l3index, flags);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index);
+ AF_INET6, prefixlen, l3index, flags);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
@@ -650,12 +656,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, l3index,
+ AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen,
GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index,
+ AF_INET6, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -963,7 +969,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
- buff->csum = 0;
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
@@ -1258,7 +1263,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
- newinet = inet_sk(newsk);
newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
@@ -1404,7 +1408,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, l3index, key->key, key->keylen,
+ AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
@@ -1506,9 +1510,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
- if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
- dst, np->rx_dst_cookie) == NULL) {
+ dst, sk->sk_rx_dst_cookie) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
@@ -1588,7 +1592,7 @@ ipv6_pktoptions:
}
}
- kfree_skb(opt_skb);
+ consume_skb(opt_skb);
return 0;
}
@@ -1618,7 +1622,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1724,9 +1727,13 @@ process:
return 0;
}
}
- if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto discard_and_relse;
+
+ if (static_branch_unlikely(&ip6_min_hopcount)) {
+ /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
+ if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto discard_and_relse;
+ }
}
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -1754,17 +1761,12 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- skb_to_free = sk->sk_rx_skb_cache;
- sk->sk_rx_skb_cache = NULL;
ret = tcp_v6_do_rcv(sk, skb);
} else {
if (tcp_add_backlog(sk, skb))
goto discard_and_relse;
- skb_to_free = NULL;
}
bh_unlock_sock(sk);
- if (skb_to_free)
- __kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
@@ -1875,9 +1877,9 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
- dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
+ dst = dst_check(dst, sk->sk_rx_dst_cookie);
if (dst &&
- inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
+ sk->sk_rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ea53847b5b7e..e43b31d25fb6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net,
dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
if (!dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -699,9 +700,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
ret = encap_rcv(sk, skb);
if (ret <= 0) {
- __UDP_INC_STATS(sock_net(sk),
- UDP_MIB_INDATAGRAMS,
- is_udplite);
+ __UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
+ is_udplite);
return -ret;
}
}
@@ -883,7 +884,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
if (udp_sk_rx_dst_set(sk, dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -1072,7 +1073,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
- dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ dst = dst_check(dst, sk->sk_rx_dst_cookie);
if (dst) {
/* set noref for now.
* any place which wants to hold dst has to call
@@ -1303,7 +1304,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1434,7 +1435,6 @@ do_udp_sendmsg:
if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
if (msg->msg_controllen) {
@@ -1470,6 +1470,7 @@ do_udp_sendmsg:
ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.daddr = *daddr;
if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
fl6.saddr = np->saddr;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 53486b162f01..93271a2632b8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
if (tunnel->version == L2TP_HDR_VER_3 &&
- l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
+ l2tp_session_dec_refcount(session);
goto invalid;
+ }
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 647c0554d04c..40ca3c1e42a2 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -781,7 +781,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
if (nskb) {
struct llc_sap *sap = llc->sap;
- u8 *dmac = llc->daddr.mac;
+ const u8 *dmac = llc->daddr.mac;
if (llc->dev->flags & IFF_LOOPBACK)
dmac = llc->dev->dev_addr;
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index ad6547736c21..dde9bf08a593 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -80,7 +80,7 @@ out_free:
* establishment will inform to upper layer via calling it's confirm
* function and passing proper information.
*/
-int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap)
+int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap)
{
int rc = -EISCONN;
struct llc_addr laddr, daddr;
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index b9ad087bcbd7..5a6466fc626a 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -56,7 +56,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
* package primitive as an event and send to SAP event handler
*/
int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
- unsigned char *dmac, unsigned char dsap)
+ const unsigned char *dmac, unsigned char dsap)
{
int rc;
llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index a4eccb98220a..0ff490a73fae 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -26,7 +26,7 @@
#include <net/llc_c_st.h>
#include <net/llc_conn.h>
-static void llc_ui_format_mac(struct seq_file *seq, u8 *addr)
+static void llc_ui_format_mac(struct seq_file *seq, const u8 *addr)
{
seq_printf(seq, "%pM", addr);
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index cce28e3b2232..470ff0ce3dc7 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -477,7 +477,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
size_t len)
{
u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
- struct ieee802_11_elems elems = { };
+ struct ieee802_11_elems *elems = NULL;
u8 dialog_token;
int ies_len;
@@ -495,16 +495,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
ies_len = len - offsetof(struct ieee80211_mgmt,
u.action.u.addba_req.variable);
if (ies_len) {
- ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
- ies_len, true, &elems, mgmt->bssid, NULL);
- if (elems.parse_error)
- return;
+ elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
+ ies_len, true, mgmt->bssid, NULL);
+ if (!elems || elems->parse_error)
+ goto free;
}
__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
buf_size, true, false,
- elems.addba_ext_ie);
+ elems ? elems->addba_ext_ie : NULL);
+free:
+ kfree(elems);
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d69b31c20fe2..e2b791c37591 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -111,6 +111,36 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_mbssid_config params)
+{
+ struct ieee80211_sub_if_data *tx_sdata;
+
+ sdata->vif.mbssid_tx_vif = NULL;
+ sdata->vif.bss_conf.bssid_index = 0;
+ sdata->vif.bss_conf.nontransmitted = false;
+ sdata->vif.bss_conf.ema_ap = false;
+
+ if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
+ return -EINVAL;
+
+ tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev);
+ if (!tx_sdata)
+ return -EINVAL;
+
+ if (tx_sdata == sdata) {
+ sdata->vif.mbssid_tx_vif = &sdata->vif;
+ } else {
+ sdata->vif.mbssid_tx_vif = &tx_sdata->vif;
+ sdata->vif.bss_conf.nontransmitted = true;
+ sdata->vif.bss_conf.bssid_index = params.index;
+ }
+ if (params.ema)
+ sdata->vif.bss_conf.ema_ap = true;
+
+ return 0;
+}
+
static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
const char *name,
unsigned char name_assign_type,
@@ -1105,6 +1135,14 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
+ if (sdata->vif.type == NL80211_IFTYPE_AP &&
+ params->mbssid_config.tx_wdev) {
+ err = ieee80211_set_ap_mbssid_options(sdata,
+ params->mbssid_config);
+ if (err)
+ return err;
+ }
+
mutex_lock(&local->mtx);
err = ieee80211_vif_use_channel(sdata, &params->chandef,
IEEE80211_CHANCTX_SHARED);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 8be28cfd6f64..481f01b0f65c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -153,20 +153,20 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
rcu_read_lock();
p += scnprintf(p,
- bufsz+buf-p,
+ bufsz + buf - p,
"target %uus interval %uus ecn %s\n",
codel_time_to_us(sta->cparams.target),
codel_time_to_us(sta->cparams.interval),
sta->cparams.ecn ? "yes" : "no");
p += scnprintf(p,
- bufsz+buf-p,
+ bufsz + buf - p,
"tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
if (!sta->sta.txq[i])
continue;
txqi = to_txq_info(sta->sta.txq[i]);
- p += scnprintf(p, bufsz+buf-p,
+ p += scnprintf(p, bufsz + buf - p,
"%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
@@ -314,17 +314,24 @@ STA_OPS_RW(aql);
static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
- char buf[71 + IEEE80211_NUM_TIDS * 40], *p = buf;
+ char *buf, *p;
+ ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
int i;
struct sta_info *sta = file->private_data;
struct tid_ampdu_rx *tid_rx;
struct tid_ampdu_tx *tid_tx;
+ ssize_t ret;
+
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = buf;
rcu_read_lock();
- p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
+ p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n",
sta->ampdu_mlme.dialog_token_allocator + 1);
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
@@ -334,25 +341,27 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
- p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
+ p += scnprintf(p, bufsz + buf - p, "%02d", i);
+ p += scnprintf(p, bufsz + buf - p, "\t\t%x",
tid_rx_valid);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
+ p += scnprintf(p, bufsz + buf - p, "\t%#.2x",
tid_rx_valid ?
sta->ampdu_mlme.tid_rx_token[i] : 0);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
+ p += scnprintf(p, bufsz + buf - p, "\t%#.3x",
tid_rx ? tid_rx->ssn : 0);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_tx);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
+ p += scnprintf(p, bufsz + buf - p, "\t\t%x", !!tid_tx);
+ p += scnprintf(p, bufsz + buf - p, "\t%#.2x",
tid_tx ? tid_tx->dialog_token : 0);
- p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
+ p += scnprintf(p, bufsz + buf - p, "\t%03d",
tid_tx ? skb_queue_len(&tid_tx->pending) : 0);
- p += scnprintf(p, sizeof(buf) + buf - p, "\n");
+ p += scnprintf(p, bufsz + buf - p, "\n");
}
rcu_read_unlock();
- return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return ret;
}
static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
@@ -434,15 +443,22 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
if (_cond) \
p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \
} while (0)
- char buf[512], *p = buf;
+ char *buf, *p;
int i;
+ ssize_t bufsz = 512;
struct sta_info *sta = file->private_data;
struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap;
+ ssize_t ret;
- p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n",
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = buf;
+
+ p += scnprintf(p, bufsz + buf - p, "ht %ssupported\n",
htc->ht_supported ? "" : "not ");
if (htc->ht_supported) {
- p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
+ p += scnprintf(p, bufsz + buf - p, "cap: %#.4x\n", htc->cap);
PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC");
PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40");
@@ -484,81 +500,90 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
PRINT_HT_CAP((htc->cap & BIT(15)), "L-SIG TXOP protection");
- p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n",
+ p += scnprintf(p, bufsz + buf - p, "ampdu factor/density: %d/%d\n",
htc->ampdu_factor, htc->ampdu_density);
- p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:");
+ p += scnprintf(p, bufsz + buf - p, "MCS mask:");
for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
- p += scnprintf(p, sizeof(buf)+buf-p, " %.2x",
+ p += scnprintf(p, bufsz + buf - p, " %.2x",
htc->mcs.rx_mask[i]);
- p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+ p += scnprintf(p, bufsz + buf - p, "\n");
/* If not set this is meaningless */
if (le16_to_cpu(htc->mcs.rx_highest)) {
- p += scnprintf(p, sizeof(buf)+buf-p,
+ p += scnprintf(p, bufsz + buf - p,
"MCS rx highest: %d Mbps\n",
le16_to_cpu(htc->mcs.rx_highest));
}
- p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n",
+ p += scnprintf(p, bufsz + buf - p, "MCS tx params: %x\n",
htc->mcs.tx_params);
}
- return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return ret;
}
STA_OPS(ht_capa);
static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
- char buf[512], *p = buf;
+ char *buf, *p;
struct sta_info *sta = file->private_data;
struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
+ ssize_t ret;
+ ssize_t bufsz = 512;
- p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = buf;
+
+ p += scnprintf(p, bufsz + buf - p, "VHT %ssupported\n",
vhtc->vht_supported ? "" : "not ");
if (vhtc->vht_supported) {
- p += scnprintf(p, sizeof(buf) + buf - p, "cap: %#.8x\n",
+ p += scnprintf(p, bufsz + buf - p, "cap: %#.8x\n",
vhtc->cap);
#define PFLAG(a, b) \
do { \
if (vhtc->cap & IEEE80211_VHT_CAP_ ## a) \
- p += scnprintf(p, sizeof(buf) + buf - p, \
+ p += scnprintf(p, bufsz + buf - p, \
"\t\t%s\n", b); \
} while (0)
switch (vhtc->cap & 0x3) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tMAX-MPDU-3895\n");
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tMAX-MPDU-7991\n");
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tMAX-MPDU-11454\n");
break;
default:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tMAX-MPDU-UNKNOWN\n");
}
switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case 0:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\t80Mhz\n");
break;
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\t160Mhz\n");
break;
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\t80+80Mhz\n");
break;
default:
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tUNKNOWN-MHZ: 0x%x\n",
(vhtc->cap >> 2) & 0x3);
}
@@ -566,15 +591,15 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
PFLAG(SHORT_GI_80, "SHORT-GI-80");
PFLAG(SHORT_GI_160, "SHORT-GI-160");
PFLAG(TXSTBC, "TXSTBC");
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tRXSTBC_%d\n", (vhtc->cap >> 8) & 0x7);
PFLAG(SU_BEAMFORMER_CAPABLE, "SU-BEAMFORMER-CAPABLE");
PFLAG(SU_BEAMFORMEE_CAPABLE, "SU-BEAMFORMEE-CAPABLE");
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tBEAMFORMEE-STS: 0x%x\n",
(vhtc->cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK) >>
IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tSOUNDING-DIMENSIONS: 0x%x\n",
(vhtc->cap & IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK)
>> IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT);
@@ -582,34 +607,36 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
PFLAG(MU_BEAMFORMEE_CAPABLE, "MU-BEAMFORMEE-CAPABLE");
PFLAG(VHT_TXOP_PS, "TXOP-PS");
PFLAG(HTC_VHT, "HTC-VHT");
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tMPDU-LENGTH-EXPONENT: 0x%x\n",
(vhtc->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >>
IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT);
PFLAG(VHT_LINK_ADAPTATION_VHT_UNSOL_MFB,
"LINK-ADAPTATION-VHT-UNSOL-MFB");
- p += scnprintf(p, sizeof(buf) + buf - p,
+ p += scnprintf(p, bufsz + buf - p,
"\t\tLINK-ADAPTATION-VHT-MRQ-MFB: 0x%x\n",
(vhtc->cap & IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB) >> 26);
PFLAG(RX_ANTENNA_PATTERN, "RX-ANTENNA-PATTERN");
PFLAG(TX_ANTENNA_PATTERN, "TX-ANTENNA-PATTERN");
- p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
+ p += scnprintf(p, bufsz + buf - p, "RX MCS: %.4x\n",
le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
if (vhtc->vht_mcs.rx_highest)
- p += scnprintf(p, sizeof(buf)+buf-p,
+ p += scnprintf(p, bufsz + buf - p,
"MCS RX highest: %d Mbps\n",
le16_to_cpu(vhtc->vht_mcs.rx_highest));
- p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n",
+ p += scnprintf(p, bufsz + buf - p, "TX MCS: %.4x\n",
le16_to_cpu(vhtc->vht_mcs.tx_mcs_map));
if (vhtc->vht_mcs.tx_highest)
- p += scnprintf(p, sizeof(buf)+buf-p,
+ p += scnprintf(p, bufsz + buf - p,
"MCS TX highest: %d Mbps\n",
le16_to_cpu(vhtc->vht_mcs.tx_highest));
#undef PFLAG
}
- return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return ret;
}
STA_OPS(vht_capa);
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index a13ae148937e..e1d4cfd99128 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c
@@ -219,7 +219,8 @@ int fils_encrypt_assoc_req(struct sk_buff *skb,
{
struct ieee80211_mgmt *mgmt = (void *)skb->data;
u8 *capab, *ies, *encr;
- const u8 *addr[5 + 1], *session;
+ const u8 *addr[5 + 1];
+ const struct element *session;
size_t len[5 + 1];
size_t crypt_len;
@@ -231,12 +232,12 @@ int fils_encrypt_assoc_req(struct sk_buff *skb,
ies = mgmt->u.assoc_req.variable;
}
- session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
- ies, skb->data + skb->len - ies);
- if (!session || session[1] != 1 + 8)
+ session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION,
+ ies, skb->data + skb->len - ies);
+ if (!session || session->datalen != 1 + 8)
return -EINVAL;
/* encrypt after FILS Session element */
- encr = (u8 *)session + 2 + 1 + 8;
+ encr = (u8 *)session->data + 1 + 8;
/* AES-SIV AAD vectors */
@@ -270,7 +271,8 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_mgmt *mgmt = (void *)frame;
u8 *capab, *ies, *encr;
- const u8 *addr[5 + 1], *session;
+ const u8 *addr[5 + 1];
+ const struct element *session;
size_t len[5 + 1];
int res;
size_t crypt_len;
@@ -280,16 +282,16 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
capab = (u8 *)&mgmt->u.assoc_resp.capab_info;
ies = mgmt->u.assoc_resp.variable;
- session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
- ies, frame + *frame_len - ies);
- if (!session || session[1] != 1 + 8) {
+ session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION,
+ ies, frame + *frame_len - ies);
+ if (!session || session->datalen != 1 + 8) {
mlme_dbg(sdata,
"No (valid) FILS Session element in (Re)Association Response frame from %pM",
mgmt->sa);
return -EINVAL;
}
/* decrypt after FILS Session element */
- encr = (u8 *)session + 2 + 1 + 8;
+ encr = (u8 *)session->data + 1 + 8;
/* AES-SIV AAD vectors */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 5d6ca4c3e698..0416c4d22292 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2020 Intel Corporation
+ * Copyright(c) 2018-2021 Intel Corporation
*/
#include <linux/delay.h>
@@ -1589,7 +1589,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_rx_status *rx_status)
{
size_t baselen;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) !=
offsetof(typeof(mgmt->u.beacon), variable));
@@ -1602,10 +1602,14 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
- false, &elems, mgmt->bssid, NULL);
+ elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
+ len - baselen, false,
+ mgmt->bssid, NULL);
- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+ if (elems) {
+ ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems);
+ kfree(elems);
+ }
}
void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1614,7 +1618,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct ieee80211_rx_status *rx_status;
struct ieee80211_mgmt *mgmt;
u16 fc;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
int ies_len;
rx_status = IEEE80211_SKB_RXCB(skb);
@@ -1651,15 +1655,16 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
if (ies_len < 0)
break;
- ieee802_11_parse_elems(
+ elems = ieee802_11_parse_elems(
mgmt->u.action.u.chan_switch.variable,
- ies_len, true, &elems, mgmt->bssid, NULL);
-
- if (elems.parse_error)
- break;
-
- ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len,
- rx_status, &elems);
+ ies_len, true, mgmt->bssid, NULL);
+
+ if (elems && !elems->parse_error)
+ ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt,
+ skb->len,
+ rx_status,
+ elems);
+ kfree(elems);
break;
}
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159af6c3ffb0..5666bbb8860b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -631,10 +631,9 @@ struct ieee80211_if_ocb {
*/
struct ieee802_11_elems;
struct ieee80211_mesh_sync_ops {
- void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata,
- u16 stype,
- struct ieee80211_mgmt *mgmt,
- struct ieee802_11_elems *elems,
+ void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype,
+ struct ieee80211_mgmt *mgmt, unsigned int len,
+ const struct ieee80211_meshconf_ie *mesh_cfg,
struct ieee80211_rx_status *rx_status);
/* should be called with beacon_data under RCU read lock */
@@ -1242,6 +1241,9 @@ struct ieee80211_local {
*/
bool suspended;
+ /* suspending is true during the whole suspend process */
+ bool suspending;
+
/*
* Resuming is true while suspended, but when we're reprogramming the
* hardware -- at that time it's allowed to use ieee80211_queue_work()
@@ -1508,6 +1510,7 @@ struct ieee80211_csa_ie {
struct ieee802_11_elems {
const u8 *ie_start;
size_t total_len;
+ u32 crc;
/* pointers to IEs */
const struct ieee80211_tdls_lnkie *lnk_id;
@@ -1517,7 +1520,6 @@ struct ieee802_11_elems {
const u8 *supp_rates;
const u8 *ds_params;
const struct ieee80211_tim_ie *tim;
- const u8 *challenge;
const u8 *rsn;
const u8 *rsnx;
const u8 *erp_info;
@@ -1571,7 +1573,6 @@ struct ieee802_11_elems {
u8 ssid_len;
u8 supp_rates_len;
u8 tim_len;
- u8 challenge_len;
u8 rsn_len;
u8 rsnx_len;
u8 ext_supp_rates_len;
@@ -2194,18 +2195,18 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb_tid(sdata, skb, 7);
}
-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
- struct ieee802_11_elems *elems,
- u64 filter, u32 crc, u8 *transmitter_bssid,
- u8 *bss_bssid);
-static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
- bool action,
- struct ieee802_11_elems *elems,
- u8 *transmitter_bssid,
- u8 *bss_bssid)
+struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
+ bool action,
+ u64 filter, u32 crc,
+ const u8 *transmitter_bssid,
+ const u8 *bss_bssid);
+static inline struct ieee802_11_elems *
+ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
+ const u8 *transmitter_bssid,
+ const u8 *bss_bssid)
{
- ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0,
- transmitter_bssid, bss_bssid);
+ return ieee802_11_parse_elems_crc(start, len, action, 0, 0,
+ transmitter_bssid, bss_bssid);
}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 62c95597704b..9a2145c8192b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -632,17 +632,46 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_add_virtual_monitor(local);
}
+static void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_sub_if_data *tx_sdata, *non_tx_sdata, *tmp_sdata;
+ struct ieee80211_vif *tx_vif = sdata->vif.mbssid_tx_vif;
+
+ if (!tx_vif)
+ return;
+
+ tx_sdata = vif_to_sdata(tx_vif);
+ sdata->vif.mbssid_tx_vif = NULL;
+
+ list_for_each_entry_safe(non_tx_sdata, tmp_sdata,
+ &tx_sdata->local->interfaces, list) {
+ if (non_tx_sdata != sdata && non_tx_sdata != tx_sdata &&
+ non_tx_sdata->vif.mbssid_tx_vif == tx_vif &&
+ ieee80211_sdata_running(non_tx_sdata)) {
+ non_tx_sdata->vif.mbssid_tx_vif = NULL;
+ dev_close(non_tx_sdata->wdev.netdev);
+ }
+ }
+
+ if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) {
+ tx_sdata->vif.mbssid_tx_vif = NULL;
+ dev_close(tx_sdata->wdev.netdev);
+ }
+}
+
static int ieee80211_stop(struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- /* close all dependent VLAN interfaces before locking wiphy */
+ /* close dependent VLAN and MBSSID interfaces before locking wiphy */
if (sdata->vif.type == NL80211_IFTYPE_AP) {
struct ieee80211_sub_if_data *vlan, *tmpsdata;
list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
u.vlan.list)
dev_close(vlan->dev);
+
+ ieee80211_stop_mbssid(sdata);
}
wiphy_lock(sdata->local->hw.wiphy);
@@ -1108,9 +1137,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
* this interface, if it has the special null one.
*/
if (dev && is_zero_ether_addr(dev->dev_addr)) {
- memcpy(dev->dev_addr,
- local->hw.wiphy->perm_addr,
- ETH_ALEN);
+ eth_hw_addr_set(dev, local->hw.wiphy->perm_addr);
memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
if (!is_valid_ether_addr(dev->dev_addr)) {
@@ -1964,9 +1991,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_assign_perm_addr(local, ndev->perm_addr, type);
if (is_valid_ether_addr(params->macaddr))
- memcpy(ndev->dev_addr, params->macaddr, ETH_ALEN);
+ eth_hw_addr_set(ndev, params->macaddr);
else
- memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
+ eth_hw_addr_set(ndev, ndev->perm_addr);
SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
/* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 97095b7c9c64..15ac08d111ea 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
u8 *ie, u8 ie_len)
{
struct ieee80211_supported_band *sband;
- const u8 *cap;
+ const struct element *cap;
const struct ieee80211_he_operation *he_oper = NULL;
sband = ieee80211_get_sband(sdata);
@@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.he_support = true;
- cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
- if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3]))
- he_oper = (void *)(cap + 3);
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
+ if (cap && cap->datalen >= 1 + sizeof(*he_oper) &&
+ cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1))
+ he_oper = (void *)(cap->data + 1);
if (he_oper)
sdata->vif.bss_conf.he_oper.params =
@@ -1246,7 +1247,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
struct sk_buff *presp;
struct beacon_data *bcn;
struct ieee80211_mgmt *hdr;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
size_t baselen;
u8 *pos;
@@ -1255,22 +1256,24 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- ieee802_11_parse_elems(pos, len - baselen, false, &elems, mgmt->bssid,
- NULL);
-
- if (!elems.mesh_id)
+ elems = ieee802_11_parse_elems(pos, len - baselen, false, mgmt->bssid,
+ NULL);
+ if (!elems)
return;
+ if (!elems->mesh_id)
+ goto free;
+
/* 802.11-2012 10.1.4.3.2 */
if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
!is_broadcast_ether_addr(mgmt->da)) ||
- elems.ssid_len != 0)
- return;
+ elems->ssid_len != 0)
+ goto free;
- if (elems.mesh_id_len != 0 &&
- (elems.mesh_id_len != ifmsh->mesh_id_len ||
- memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len)))
- return;
+ if (elems->mesh_id_len != 0 &&
+ (elems->mesh_id_len != ifmsh->mesh_id_len ||
+ memcmp(elems->mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len)))
+ goto free;
rcu_read_lock();
bcn = rcu_dereference(ifmsh->beacon);
@@ -1294,6 +1297,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb(sdata, presp);
out:
rcu_read_unlock();
+free:
+ kfree(elems);
}
static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
@@ -1304,7 +1309,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
struct ieee80211_channel *channel;
size_t baselen;
int freq;
@@ -1319,42 +1324,47 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
- false, &elems, mgmt->bssid, NULL);
+ elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
+ len - baselen,
+ false, mgmt->bssid, NULL);
+ if (!elems)
+ return;
/* ignore non-mesh or secure / unsecure mismatch */
- if ((!elems.mesh_id || !elems.mesh_config) ||
- (elems.rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
- (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
- return;
+ if ((!elems->mesh_id || !elems->mesh_config) ||
+ (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
+ (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
+ goto free;
- if (elems.ds_params)
- freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
+ if (elems->ds_params)
+ freq = ieee80211_channel_to_frequency(elems->ds_params[0], band);
else
freq = rx_status->freq;
channel = ieee80211_get_channel(local->hw.wiphy, freq);
if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
- return;
+ goto free;
- if (mesh_matches_local(sdata, &elems)) {
+ if (mesh_matches_local(sdata, elems)) {
mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n",
sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal);
if (!sdata->u.mesh.user_mpm ||
sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
- mesh_neighbour_update(sdata, mgmt->sa, &elems,
+ mesh_neighbour_update(sdata, mgmt->sa, elems,
rx_status);
if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
!sdata->vif.csa_active)
- ieee80211_mesh_process_chnswitch(sdata, &elems, true);
+ ieee80211_mesh_process_chnswitch(sdata, elems, true);
}
if (ifmsh->sync_ops)
- ifmsh->sync_ops->rx_bcn_presp(sdata,
- stype, mgmt, &elems, rx_status);
+ ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len,
+ elems->mesh_config, rx_status);
+free:
+ kfree(elems);
}
int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
@@ -1446,7 +1456,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
u16 pre_value;
bool fwd_csa = true;
size_t baselen;
@@ -1459,33 +1469,37 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
pos = mgmt->u.action.u.chan_switch.variable;
baselen = offsetof(struct ieee80211_mgmt,
u.action.u.chan_switch.variable);
- ieee802_11_parse_elems(pos, len - baselen, true, &elems,
- mgmt->bssid, NULL);
-
- if (!mesh_matches_local(sdata, &elems))
+ elems = ieee802_11_parse_elems(pos, len - baselen, true,
+ mgmt->bssid, NULL);
+ if (!elems)
return;
- ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl;
+ if (!mesh_matches_local(sdata, elems))
+ goto free;
+
+ ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl;
if (!--ifmsh->chsw_ttl)
fwd_csa = false;
- pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value);
+ pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value);
if (ifmsh->pre_value >= pre_value)
- return;
+ goto free;
ifmsh->pre_value = pre_value;
if (!sdata->vif.csa_active &&
- !ieee80211_mesh_process_chnswitch(sdata, &elems, false)) {
+ !ieee80211_mesh_process_chnswitch(sdata, elems, false)) {
mcsa_dbg(sdata, "Failed to process CSA action frame");
- return;
+ goto free;
}
/* forward or re-broadcast the CSA frame */
if (fwd_csa) {
- if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0)
+ if (mesh_fwd_csa_frame(sdata, mgmt, len, elems) < 0)
mcsa_dbg(sdata, "Failed to forward the CSA frame");
}
+free:
+ kfree(elems);
}
static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index a05b615deb51..44a6fdb6efbd 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2021 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
@@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
size_t baselen;
u32 path_metric;
struct sta_info *sta;
@@ -926,37 +926,41 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
- ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
- len - baselen, false, &elems, mgmt->bssid, NULL);
+ elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
+ len - baselen, false, mgmt->bssid, NULL);
+ if (!elems)
+ return;
- if (elems.preq) {
- if (elems.preq_len != 37)
+ if (elems->preq) {
+ if (elems->preq_len != 37)
/* Right now we support just 1 destination and no AE */
- return;
- path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq,
+ goto free;
+ path_metric = hwmp_route_info_get(sdata, mgmt, elems->preq,
MPATH_PREQ);
if (path_metric)
- hwmp_preq_frame_process(sdata, mgmt, elems.preq,
+ hwmp_preq_frame_process(sdata, mgmt, elems->preq,
path_metric);
}
- if (elems.prep) {
- if (elems.prep_len != 31)
+ if (elems->prep) {
+ if (elems->prep_len != 31)
/* Right now we support no AE */
- return;
- path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep,
+ goto free;
+ path_metric = hwmp_route_info_get(sdata, mgmt, elems->prep,
MPATH_PREP);
if (path_metric)
- hwmp_prep_frame_process(sdata, mgmt, elems.prep,
+ hwmp_prep_frame_process(sdata, mgmt, elems->prep,
path_metric);
}
- if (elems.perr) {
- if (elems.perr_len != 15)
+ if (elems->perr) {
+ if (elems->perr_len != 15)
/* Right now we support only one destination per PERR */
- return;
- hwmp_perr_frame_process(sdata, mgmt, elems.perr);
+ goto free;
+ hwmp_perr_frame_process(sdata, mgmt, elems->perr);
}
- if (elems.rann)
- hwmp_rann_frame_process(sdata, mgmt, elems.rann);
+ if (elems->rann)
+ hwmp_rann_frame_process(sdata, mgmt, elems->rann);
+free:
+ kfree(elems);
}
/**
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index efbefcbac3ac..7cab1cf09bf1 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void)
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
spin_lock_init(&newtbl->walk_lock);
- rhashtable_init(&newtbl->rhead, &mesh_rht_params);
+ if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
+ kfree(newtbl);
+ return NULL;
+ }
return newtbl;
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index a6915847d78a..a829470dd59e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2021 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
#include <linux/gfp.h>
@@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len,
struct ieee80211_rx_status *rx_status)
{
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
size_t baselen;
u8 *baseaddr;
@@ -1228,7 +1228,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
}
- ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems,
- mgmt->bssid, NULL);
- mesh_process_plink_frame(sdata, mgmt, &elems, rx_status);
+ elems = ieee802_11_parse_elems(baseaddr, len - baselen, true,
+ mgmt->bssid, NULL);
+ mesh_process_plink_frame(sdata, mgmt, elems, rx_status);
+ kfree(elems);
}
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 204830a55240..3fbd0b9ff913 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -2,6 +2,7 @@
/*
* Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2012-2013, cozybit Inc.
+ * Copyright (C) 2021 Intel Corporation
*/
#include "mesh.h"
@@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
/* only transmit to PS STA with announced, non-zero awake window */
if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
- (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+ (!elems->awake_window || !get_unaligned_le16(elems->awake_window)))
return;
if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index fde93de2b80a..9e342cc2504c 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -3,6 +3,7 @@
* Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com>
* Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2011-2012, cozybit Inc.
+ * Copyright (C) 2021 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -35,12 +36,12 @@ struct sync_method {
/**
* mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT
*
- * @ie: information elements of a management frame from the mesh peer
+ * @cfg: mesh config element from the mesh peer (or %NULL)
*/
-static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie)
+static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg)
{
- return (ie->mesh_config->meshconf_cap &
- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
+ return cfg &&
+ (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING);
}
void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata)
@@ -76,11 +77,11 @@ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata)
}
}
-static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
- u16 stype,
- struct ieee80211_mgmt *mgmt,
- struct ieee802_11_elems *elems,
- struct ieee80211_rx_status *rx_status)
+static void
+mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype,
+ struct ieee80211_mgmt *mgmt, unsigned int len,
+ const struct ieee80211_meshconf_ie *mesh_cfg,
+ struct ieee80211_rx_status *rx_status)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_local *local = sdata->local;
@@ -101,10 +102,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
*/
if (ieee80211_have_rx_timestamp(rx_status))
t_r = ieee80211_calculate_rx_timestamp(local, rx_status,
- 24 + 12 +
- elems->total_len +
- FCS_LEN,
- 24);
+ len + FCS_LEN, 24);
else
t_r = drv_get_tsf(local, sdata);
@@ -119,7 +117,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
* dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors
*/
- if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) {
+ if (mesh_peer_tbtt_adjusting(mesh_cfg)) {
msync_dbg(sdata, "STA %pM : is adjusting TBTT\n",
sta->sta.addr);
goto no_sync;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c0ea3b1aa9e1..54ab0e1ef6ca 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1490,6 +1490,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
fallthrough;
case NL80211_BAND_2GHZ:
case NL80211_BAND_60GHZ:
+ case NL80211_BAND_LC:
chan_increment = 1;
break;
case NL80211_BAND_5GHZ:
@@ -2258,6 +2259,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
u32 changed = 0;
struct ieee80211_prep_tx_info info = {
.subtype = stype,
@@ -2407,6 +2409,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
cancel_delayed_work_sync(&ifmgd->tx_tspec_wk);
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+
+ bss_conf->pwr_reduction = 0;
+ bss_conf->tx_pwr_env_num = 0;
+ memset(bss_conf->tx_pwr_env, 0, sizeof(bss_conf->tx_pwr_env));
}
static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
@@ -2509,7 +2515,7 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const u8 *ssid;
+ const struct element *ssid;
u8 *dst = ifmgd->associated->bssid;
u8 unicast_limit = max(1, max_probe_tries - 3);
struct sta_info *sta;
@@ -2546,14 +2552,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
int ssid_len;
rcu_read_lock();
- ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
+ ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID);
if (WARN_ON_ONCE(ssid == NULL))
ssid_len = 0;
else
- ssid_len = ssid[1];
+ ssid_len = ssid->datalen;
ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
- ssid + 2, ssid_len,
+ ssid->data, ssid_len,
ifmgd->associated->channel);
rcu_read_unlock();
}
@@ -2583,6 +2589,13 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
goto out;
}
+ if (sdata->local->suspending) {
+ /* reschedule after resume */
+ mutex_unlock(&sdata->local->mtx);
+ ieee80211_reset_ap_probe(sdata);
+ goto out;
+ }
+
if (beacon) {
mlme_dbg_ratelimited(sdata,
"detected beacon loss from AP (missed %d beacons) - probing\n",
@@ -2629,7 +2642,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct cfg80211_bss *cbss;
struct sk_buff *skb;
- const u8 *ssid;
+ const struct element *ssid;
int ssid_len;
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
@@ -2647,16 +2660,17 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
return NULL;
rcu_read_lock();
- ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID);
- if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN,
- "invalid SSID element (len=%d)", ssid ? ssid[1] : -1))
+ ssid = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
+ if (WARN_ONCE(!ssid || ssid->datalen > IEEE80211_MAX_SSID_LEN,
+ "invalid SSID element (len=%d)",
+ ssid ? ssid->datalen : -1))
ssid_len = 0;
else
- ssid_len = ssid[1];
+ ssid_len = ssid->datalen;
skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid,
(u32) -1, cbss->channel,
- ssid + 2, ssid_len,
+ ssid->data, ssid_len,
NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED);
rcu_read_unlock();
@@ -2870,17 +2884,17 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
+ const struct element *challenge;
u8 *pos;
- struct ieee802_11_elems elems;
u32 tx_flags = 0;
struct ieee80211_prep_tx_info info = {
.subtype = IEEE80211_STYPE_AUTH,
};
pos = mgmt->u.auth.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
- mgmt->bssid, auth_data->bss->bssid);
- if (!elems.challenge)
+ challenge = cfg80211_find_elem(WLAN_EID_CHALLENGE, pos,
+ len - (pos - (u8 *)mgmt));
+ if (!challenge)
return;
auth_data->expected_transaction = 4;
drv_mgd_prepare_tx(sdata->local, sdata, &info);
@@ -2888,7 +2902,8 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
- elems.challenge - 2, elems.challenge_len + 2,
+ (void *)challenge,
+ challenge->datalen + sizeof(*challenge),
auth_data->bss->bssid, auth_data->bss->bssid,
auth_data->key, auth_data->key_len,
auth_data->key_idx, tx_flags);
@@ -3290,8 +3305,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
aid = 0; /* TODO */
}
capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, elems,
- mgmt->bssid, assoc_data->bss->bssid);
+ elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
+ mgmt->bssid, assoc_data->bss->bssid);
+
+ if (!elems)
+ return false;
if (elems->aid_resp)
aid = le16_to_cpu(elems->aid_resp->aid);
@@ -3313,7 +3331,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (!is_s1g && !elems->supp_rates) {
sdata_info(sdata, "no SuppRates element in AssocResp\n");
- return false;
+ ret = false;
+ goto out;
}
sdata->vif.bss_conf.aid = aid;
@@ -3335,7 +3354,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
(!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
- struct ieee802_11_elems bss_elems;
+ struct ieee802_11_elems *bss_elems;
rcu_read_lock();
ies = rcu_dereference(cbss->ies);
@@ -3343,16 +3362,22 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_ies = kmemdup(ies, sizeof(*ies) + ies->len,
GFP_ATOMIC);
rcu_read_unlock();
- if (!bss_ies)
- return false;
+ if (!bss_ies) {
+ ret = false;
+ goto out;
+ }
+
+ bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
+ false, mgmt->bssid,
+ assoc_data->bss->bssid);
+ if (!bss_elems) {
+ ret = false;
+ goto out;
+ }
- ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
- false, &bss_elems,
- mgmt->bssid,
- assoc_data->bss->bssid);
if (assoc_data->wmm &&
- !elems->wmm_param && bss_elems.wmm_param) {
- elems->wmm_param = bss_elems.wmm_param;
+ !elems->wmm_param && bss_elems->wmm_param) {
+ elems->wmm_param = bss_elems->wmm_param;
sdata_info(sdata,
"AP bug: WMM param missing from AssocResp\n");
}
@@ -3361,30 +3386,32 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* Also check if we requested HT/VHT, otherwise the AP doesn't
* have to include the IEs in the (re)association response.
*/
- if (!elems->ht_cap_elem && bss_elems.ht_cap_elem &&
+ if (!elems->ht_cap_elem && bss_elems->ht_cap_elem &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- elems->ht_cap_elem = bss_elems.ht_cap_elem;
+ elems->ht_cap_elem = bss_elems->ht_cap_elem;
sdata_info(sdata,
"AP bug: HT capability missing from AssocResp\n");
}
- if (!elems->ht_operation && bss_elems.ht_operation &&
+ if (!elems->ht_operation && bss_elems->ht_operation &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- elems->ht_operation = bss_elems.ht_operation;
+ elems->ht_operation = bss_elems->ht_operation;
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
- if (!elems->vht_cap_elem && bss_elems.vht_cap_elem &&
+ if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
- elems->vht_cap_elem = bss_elems.vht_cap_elem;
+ elems->vht_cap_elem = bss_elems->vht_cap_elem;
sdata_info(sdata,
"AP bug: VHT capa missing from AssocResp\n");
}
- if (!elems->vht_operation && bss_elems.vht_operation &&
+ if (!elems->vht_operation && bss_elems->vht_operation &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
- elems->vht_operation = bss_elems.vht_operation;
+ elems->vht_operation = bss_elems->vht_operation;
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
}
+
+ kfree(bss_elems);
}
/*
@@ -3629,6 +3656,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ret = true;
out:
+ kfree(elems);
kfree(bss_ies);
return ret;
}
@@ -3640,7 +3668,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
u16 capab_info, status_code, aid;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
int ac, uapsd_queues = -1;
u8 *pos;
bool reassoc;
@@ -3697,14 +3725,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
return;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
- mgmt->bssid, assoc_data->bss->bssid);
+ elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
+ mgmt->bssid, assoc_data->bss->bssid);
+ if (!elems)
+ goto notify_driver;
if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
- elems.timeout_int &&
- elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+ elems->timeout_int &&
+ elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
u32 tu, ms;
- tu = le32_to_cpu(elems.timeout_int->value);
+ tu = le32_to_cpu(elems->timeout_int->value);
ms = tu * 1024 / 1000;
sdata_info(sdata,
"%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
@@ -3724,7 +3754,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
} else {
- if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, &elems)) {
+ if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, elems)) {
/* oops -- internal error -- send timeout for now */
ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, cbss);
@@ -3754,6 +3784,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
notify_driver:
drv_mgd_complete_tx(sdata->local, sdata, &info);
+ kfree(elems);
}
static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -3958,7 +3989,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
struct ieee80211_mgmt *mgmt = (void *) hdr;
size_t baselen;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_channel *chan;
@@ -4004,15 +4035,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) {
- ieee802_11_parse_elems(variable,
- len - baselen, false, &elems,
- bssid,
- ifmgd->assoc_data->bss->bssid);
+ elems = ieee802_11_parse_elems(variable, len - baselen, false,
+ bssid,
+ ifmgd->assoc_data->bss->bssid);
+ if (!elems)
+ return;
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
- if (elems.dtim_period)
- ifmgd->dtim_period = elems.dtim_period;
+ if (elems->dtim_period)
+ ifmgd->dtim_period = elems->dtim_period;
ifmgd->have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
@@ -4020,17 +4052,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
le64_to_cpu(mgmt->u.beacon.timestamp);
sdata->vif.bss_conf.sync_device_ts =
rx_status->device_timestamp;
- sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
+ sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
}
- if (elems.mbssid_config_ie)
+ if (elems->mbssid_config_ie)
bss_conf->profile_periodicity =
- elems.mbssid_config_ie->profile_periodicity;
+ elems->mbssid_config_ie->profile_periodicity;
else
bss_conf->profile_periodicity = 0;
- if (elems.ext_capab_len >= 11 &&
- (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+ if (elems->ext_capab_len >= 11 &&
+ (elems->ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
bss_conf->ema_ap = true;
else
bss_conf->ema_ap = false;
@@ -4039,6 +4071,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
ifmgd->assoc_data->timeout = jiffies;
ifmgd->assoc_data->timeout_started = true;
run_again(sdata, ifmgd->assoc_data->timeout);
+ kfree(elems);
return;
}
@@ -4070,13 +4103,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
*/
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
- ncrc = ieee802_11_parse_elems_crc(variable,
- len - baselen, false, &elems,
- care_about_ies, ncrc,
- mgmt->bssid, bssid);
+ elems = ieee802_11_parse_elems_crc(variable, len - baselen,
+ false, care_about_ies, ncrc,
+ mgmt->bssid, bssid);
+ if (!elems)
+ return;
+ ncrc = elems->crc;
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
- ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) {
+ ieee80211_check_tim(elems->tim, elems->tim_len, bss_conf->aid)) {
if (local->hw.conf.dynamic_ps_timeout > 0) {
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
@@ -4146,12 +4181,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
le64_to_cpu(mgmt->u.beacon.timestamp);
sdata->vif.bss_conf.sync_device_ts =
rx_status->device_timestamp;
- sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
+ sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
}
if ((ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) ||
ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- return;
+ goto free;
ifmgd->beacon_crc = ncrc;
ifmgd->beacon_crc_valid = true;
@@ -4159,12 +4194,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
rx_status->device_timestamp,
- &elems, true);
+ elems, true);
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
- ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
- elems.wmm_param_len,
- elems.mu_edca_param_set))
+ ieee80211_sta_wmm_params(local, sdata, elems->wmm_param,
+ elems->wmm_param_len,
+ elems->mu_edca_param_set))
changed |= BSS_CHANGED_QOS;
/*
@@ -4173,7 +4208,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
*/
if (!ifmgd->have_beacon) {
/* a few bogus AP send dtim_period = 0 or no TIM IE */
- bss_conf->dtim_period = elems.dtim_period ?: 1;
+ bss_conf->dtim_period = elems->dtim_period ?: 1;
changed |= BSS_CHANGED_BEACON_INFO;
ifmgd->have_beacon = true;
@@ -4185,9 +4220,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_ps_vif(sdata);
}
- if (elems.erp_info) {
+ if (elems->erp_info) {
erp_valid = true;
- erp_value = elems.erp_info[0];
+ erp_value = elems->erp_info[0];
} else {
erp_valid = false;
}
@@ -4200,12 +4235,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, bssid);
- changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
+ changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
- if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem,
- elems.vht_cap_elem, elems.ht_operation,
- elems.vht_operation, elems.he_operation,
- elems.s1g_oper, bssid, &changed)) {
+ if (ieee80211_config_bw(sdata, sta, elems->ht_cap_elem,
+ elems->vht_cap_elem, elems->ht_operation,
+ elems->vht_operation, elems->he_operation,
+ elems->s1g_oper, bssid, &changed)) {
mutex_unlock(&local->sta_mtx);
sdata_info(sdata,
"failed to follow AP %pM bandwidth change, disconnect\n",
@@ -4217,21 +4252,23 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
sizeof(deauth_buf), true,
WLAN_REASON_DEAUTH_LEAVING,
false);
- return;
+ goto free;
}
- if (sta && elems.opmode_notif)
- ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif,
+ if (sta && elems->opmode_notif)
+ ieee80211_vht_handle_opmode(sdata, sta, *elems->opmode_notif,
rx_status->band);
mutex_unlock(&local->sta_mtx);
changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
- elems.country_elem,
- elems.country_elem_len,
- elems.pwr_constr_elem,
- elems.cisco_dtpc_elem);
+ elems->country_elem,
+ elems->country_elem_len,
+ elems->pwr_constr_elem,
+ elems->cisco_dtpc_elem);
ieee80211_bss_info_change_notify(sdata, changed);
+free:
+ kfree(elems);
}
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
@@ -4260,7 +4297,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct ieee80211_rx_status *rx_status;
struct ieee80211_mgmt *mgmt;
u16 fc;
- struct ieee802_11_elems elems;
int ies_len;
rx_status = (struct ieee80211_rx_status *) skb->cb;
@@ -4292,6 +4328,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
break;
case IEEE80211_STYPE_ACTION:
if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
+ struct ieee802_11_elems *elems;
+
ies_len = skb->len -
offsetof(struct ieee80211_mgmt,
u.action.u.chan_switch.variable);
@@ -4300,18 +4338,19 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
break;
/* CSA IE cannot be overridden, no need for BSSID */
- ieee802_11_parse_elems(
- mgmt->u.action.u.chan_switch.variable,
- ies_len, true, &elems, mgmt->bssid, NULL);
-
- if (elems.parse_error)
- break;
-
- ieee80211_sta_process_chanswitch(sdata,
- rx_status->mactime,
- rx_status->device_timestamp,
- &elems, false);
+ elems = ieee802_11_parse_elems(
+ mgmt->u.action.u.chan_switch.variable,
+ ies_len, true, mgmt->bssid, NULL);
+
+ if (elems && !elems->parse_error)
+ ieee80211_sta_process_chanswitch(sdata,
+ rx_status->mactime,
+ rx_status->device_timestamp,
+ elems, false);
+ kfree(elems);
} else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
+ struct ieee802_11_elems *elems;
+
ies_len = skb->len -
offsetof(struct ieee80211_mgmt,
u.action.u.ext_chan_switch.variable);
@@ -4323,21 +4362,22 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
* extended CSA IE can't be overridden, no need for
* BSSID
*/
- ieee802_11_parse_elems(
- mgmt->u.action.u.ext_chan_switch.variable,
- ies_len, true, &elems, mgmt->bssid, NULL);
-
- if (elems.parse_error)
- break;
-
- /* for the handling code pretend this was also an IE */
- elems.ext_chansw_ie =
- &mgmt->u.action.u.ext_chan_switch.data;
+ elems = ieee802_11_parse_elems(
+ mgmt->u.action.u.ext_chan_switch.variable,
+ ies_len, true, mgmt->bssid, NULL);
+
+ if (elems && !elems->parse_error) {
+ /* for the handling code pretend it was an IE */
+ elems->ext_chansw_ie =
+ &mgmt->u.action.u.ext_chan_switch.data;
+
+ ieee80211_sta_process_chanswitch(sdata,
+ rx_status->mactime,
+ rx_status->device_timestamp,
+ elems, false);
+ }
- ieee80211_sta_process_chanswitch(sdata,
- rx_status->mactime,
- rx_status->device_timestamp,
- &elems, false);
+ kfree(elems);
}
break;
}
@@ -4972,10 +5012,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
struct ieee80211_bss *bss = (void *)cbss->priv;
+ struct ieee802_11_elems *elems;
+ const struct cfg80211_bss_ies *ies;
int ret;
u32 i;
bool have_80mhz;
+ rcu_read_lock();
+
+ ies = rcu_dereference(cbss->ies);
+ elems = ieee802_11_parse_elems(ies->data, ies->len, false,
+ NULL, NULL);
+ if (!elems) {
+ rcu_read_unlock();
+ return -ENOMEM;
+ }
+
sband = local->hw.wiphy->bands[cbss->channel->band];
ifmgd->flags &= ~(IEEE80211_STA_DISABLE_40MHZ |
@@ -4998,18 +5050,9 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ieee80211_vif_type_p2p(&sdata->vif)))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- rcu_read_lock();
-
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) {
- const u8 *ht_oper_ie, *ht_cap_ie;
-
- ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION);
- if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper))
- ht_oper = (void *)(ht_oper_ie + 2);
-
- ht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY);
- if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap))
- ht_cap = (void *)(ht_cap_ie + 2);
+ ht_oper = elems->ht_operation;
+ ht_cap = elems->ht_cap_elem;
if (!ht_cap) {
ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
@@ -5018,12 +5061,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) {
- const u8 *vht_oper_ie, *vht_cap;
-
- vht_oper_ie = ieee80211_bss_get_ie(cbss,
- WLAN_EID_VHT_OPERATION);
- if (vht_oper_ie && vht_oper_ie[1] >= sizeof(*vht_oper))
- vht_oper = (void *)(vht_oper_ie + 2);
+ vht_oper = elems->vht_operation;
if (vht_oper && !ht_oper) {
vht_oper = NULL;
sdata_info(sdata,
@@ -5033,25 +5071,38 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
- vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY);
- if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) {
+ if (!elems->vht_cap_elem) {
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
vht_oper = NULL;
}
}
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) {
- const struct cfg80211_bss_ies *ies;
- const u8 *he_oper_ie;
+ he_oper = elems->he_operation;
- ies = rcu_dereference(cbss->ies);
- he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION,
- ies->data, ies->len);
- if (he_oper_ie &&
- he_oper_ie[1] >= ieee80211_he_oper_size(&he_oper_ie[3]))
- he_oper = (void *)(he_oper_ie + 3);
- else
- he_oper = NULL;
+ if (is_6ghz) {
+ struct ieee80211_bss_conf *bss_conf;
+ u8 i, j = 0;
+
+ bss_conf = &sdata->vif.bss_conf;
+
+ if (elems->pwr_constr_elem)
+ bss_conf->pwr_reduction = *elems->pwr_constr_elem;
+
+ BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
+ ARRAY_SIZE(elems->tx_pwr_env));
+
+ for (i = 0; i < elems->tx_pwr_env_num; i++) {
+ if (elems->tx_pwr_env_len[i] >
+ sizeof(bss_conf->tx_pwr_env[j]))
+ continue;
+
+ bss_conf->tx_pwr_env_num++;
+ memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
+ elems->tx_pwr_env_len[i]);
+ j++;
+ }
+ }
if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
@@ -5072,13 +5123,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
if (sband->band == NL80211_BAND_S1GHZ) {
- const u8 *s1g_oper_ie;
-
- s1g_oper_ie = ieee80211_bss_get_ie(cbss,
- WLAN_EID_S1G_OPERATION);
- if (s1g_oper_ie && s1g_oper_ie[1] >= sizeof(*s1g_oper))
- s1g_oper = (void *)(s1g_oper_ie + 2);
- else
+ s1g_oper = elems->s1g_oper;
+ if (!s1g_oper)
sdata_info(sdata,
"AP missing S1G operation element?\n");
}
@@ -5094,6 +5140,9 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
local->rx_chains);
rcu_read_unlock();
+ /* the element data was RCU protected so no longer valid anyway */
+ kfree(elems);
+ elems = NULL;
if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) {
sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
@@ -5498,7 +5547,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_bss_ies *beacon_ies;
struct ieee80211_supported_band *sband;
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- const u8 *ssidie, *ht_ie, *vht_ie;
+ const struct element *ssid_elem, *ht_elem, *vht_elem;
int i, err;
bool override = false;
@@ -5507,14 +5556,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
rcu_read_lock();
- ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
- if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) {
+ ssid_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_SSID);
+ if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
}
- memcpy(assoc_data->ssid, ssidie + 2, ssidie[1]);
- assoc_data->ssid_len = ssidie[1];
+ memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen);
+ assoc_data->ssid_len = ssid_elem->datalen;
memcpy(bss_conf->ssid, assoc_data->ssid, assoc_data->ssid_len);
bss_conf->ssid_len = assoc_data->ssid_len;
rcu_read_unlock();
@@ -5628,15 +5677,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->supp_rates_len = bss->supp_rates_len;
rcu_read_lock();
- ht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_OPERATION);
- if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation))
+ ht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_HT_OPERATION);
+ if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation))
assoc_data->ap_ht_param =
- ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param;
+ ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param;
else if (!is_6ghz)
ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY);
- if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap))
- memcpy(&assoc_data->ap_vht_cap, vht_ie + 2,
+ vht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_VHT_CAPABILITY);
+ if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap))
+ memcpy(&assoc_data->ap_vht_cap, vht_elem->data,
sizeof(struct ieee80211_vht_cap));
else if (is_5ghz)
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT |
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 7809a906d7fe..0ccb5701c7f3 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -27,6 +27,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
if (!local->open_count)
goto suspend;
+ local->suspending = true;
+ mb(); /* make suspending visible before any cancellation */
+
ieee80211_scan_cancel(local);
ieee80211_dfs_cac_cancel(local);
@@ -176,6 +179,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
/* need suspended to be visible before quiescing is false */
barrier();
local->quiescing = false;
+ local->suspending = false;
return 0;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index e5935e3d7a07..8c6416129d5b 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
int mcast_rate;
bool use_basicrate = false;
- if (ieee80211_is_tx_data(txrc->skb) &&
- info->flags & IEEE80211_TX_CTL_NO_ACK)
- return false;
-
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 99ed68f7dc36..fc5c608d02e2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3216,10 +3216,7 @@ static bool
ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
{
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct ieee80211_sub_if_data *sdata = rx->sdata;
- const struct ieee80211_sta_he_cap *hecap;
- struct ieee80211_supported_band *sband;
/* TWT actions are only supported in AP for the moment */
if (sdata->vif.type != NL80211_IFTYPE_AP)
@@ -3228,14 +3225,7 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
if (!rx->local->ops->add_twt_setup)
return false;
- sband = rx->local->hw.wiphy->bands[status->band];
- hecap = ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif));
- if (!hecap)
- return false;
-
- if (!(hecap->he_cap_elem.mac_cap_info[0] &
- IEEE80211_HE_MAC_CAP0_TWT_RES))
+ if (!sdata->vif.bss_conf.twt_responder)
return false;
if (!rx->sta)
@@ -4131,7 +4121,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
- ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+ !is_valid_ether_addr(hdr->addr2))
return false;
if (ieee80211_is_beacon(hdr->frame_control))
return true;
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index 7e35ab5b6166..4141bc80cdfd 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -104,9 +104,11 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
/* broadcast TWT not supported yet */
if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) {
- le16p_replace_bits(&twt_agrt->req_type,
- TWT_SETUP_CMD_REJECT,
- IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ twt_agrt->req_type &=
+ ~cpu_to_le16(IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ twt_agrt->req_type |=
+ le16_encode_bits(TWT_SETUP_CMD_REJECT,
+ IEEE80211_TWT_REQTYPE_SETUP_CMD);
goto out;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 6b50cb5e0e3c..5e6b275afc9e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
};
bool signal_valid;
struct ieee80211_sub_if_data *scan_sdata;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
size_t baselen;
u8 *elements;
@@ -209,8 +209,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (baselen > len)
return NULL;
- ieee802_11_parse_elems(elements, len - baselen, false, &elems,
- mgmt->bssid, cbss->bssid);
+ elems = ieee802_11_parse_elems(elements, len - baselen, false,
+ mgmt->bssid, cbss->bssid);
+ if (!elems)
+ return NULL;
/* In case the signal is invalid update the status */
signal_valid = channel == cbss->channel;
@@ -218,15 +220,17 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
bss = (void *)cbss->priv;
- ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon);
+ ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon);
list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) {
non_tx_bss = (void *)non_tx_cbss->priv;
- ieee80211_update_bss_from_elems(local, non_tx_bss, &elems,
+ ieee80211_update_bss_from_elems(local, non_tx_bss, elems,
rx_status, beacon);
}
+ kfree(elems);
+
return bss;
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2b5acb37587f..51b49f0d3ad4 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -444,6 +444,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
switch (i) {
case NL80211_BAND_2GHZ:
+ case NL80211_BAND_LC:
/*
* We use both here, even if we cannot really know for
* sure the station will support both, but the only use
@@ -513,6 +514,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->cparams.target = MS2TIME(20);
sta->cparams.interval = MS2TIME(100);
sta->cparams.ecn = true;
+ sta->cparams.ce_threshold_selector = 0;
+ sta->cparams.ce_threshold_mask = 0;
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 45e532ad1215..137be9ec94af 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -6,7 +6,7 @@
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
* Copyright 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2021 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems = NULL;
struct sta_info *sta;
struct ieee80211_tdls_data *tf = (void *)skb->data;
bool local_initiator;
@@ -1718,16 +1718,20 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
goto call_drv;
}
- ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
- skb->len - baselen, false, &elems,
- NULL, NULL);
- if (elems.parse_error) {
+ elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
+ skb->len - baselen, false, NULL, NULL);
+ if (!elems) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (elems->parse_error) {
tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n");
ret = -EINVAL;
goto out;
}
- if (!elems.ch_sw_timing || !elems.lnk_id) {
+ if (!elems->ch_sw_timing || !elems->lnk_id) {
tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n");
ret = -EINVAL;
goto out;
@@ -1735,15 +1739,15 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
/* validate the initiator is set correctly */
local_initiator =
- !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
+ !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
if (local_initiator == sta->sta.tdls_initiator) {
tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n");
ret = -EINVAL;
goto out;
}
- params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time);
- params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout);
+ params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time);
+ params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout);
params.tmpl_skb =
ieee80211_tdls_ch_sw_resp_tmpl_get(sta, &params.ch_sw_tm_ie);
@@ -1763,6 +1767,7 @@ call_drv:
out:
mutex_unlock(&local->sta_mtx);
dev_kfree_skb_any(params.tmpl_skb);
+ kfree(elems);
return ret;
}
@@ -1771,7 +1776,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
- struct ieee802_11_elems elems;
+ struct ieee802_11_elems *elems;
struct cfg80211_chan_def chandef;
struct ieee80211_channel *chan;
enum nl80211_channel_type chan_type;
@@ -1831,22 +1836,27 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
}
- ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
- skb->len - baselen, false, &elems, NULL, NULL);
- if (elems.parse_error) {
+ elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
+ skb->len - baselen, false, NULL, NULL);
+ if (!elems)
+ return -ENOMEM;
+
+ if (elems->parse_error) {
tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto free;
}
- if (!elems.ch_sw_timing || !elems.lnk_id) {
+ if (!elems->ch_sw_timing || !elems->lnk_id) {
tdls_dbg(sdata, "TDLS channel switch req - missing IEs\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto free;
}
- if (!elems.sec_chan_offs) {
+ if (!elems->sec_chan_offs) {
chan_type = NL80211_CHAN_HT20;
} else {
- switch (elems.sec_chan_offs->sec_chan_offs) {
+ switch (elems->sec_chan_offs->sec_chan_offs) {
case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
chan_type = NL80211_CHAN_HT40PLUS;
break;
@@ -1865,7 +1875,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef,
sdata->wdev.iftype)) {
tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto free;
}
mutex_lock(&local->sta_mtx);
@@ -1881,7 +1892,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
/* validate the initiator is set correctly */
local_initiator =
- !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
+ !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
if (local_initiator == sta->sta.tdls_initiator) {
tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n");
ret = -EINVAL;
@@ -1889,16 +1900,16 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
}
/* peer should have known better */
- if (!sta->sta.ht_cap.ht_supported && elems.sec_chan_offs &&
- elems.sec_chan_offs->sec_chan_offs) {
+ if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs &&
+ elems->sec_chan_offs->sec_chan_offs) {
tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
ret = -ENOTSUPP;
goto out;
}
params.chandef = &chandef;
- params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time);
- params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout);
+ params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time);
+ params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout);
params.tmpl_skb =
ieee80211_tdls_ch_sw_resp_tmpl_get(sta,
@@ -1917,6 +1928,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
out:
mutex_unlock(&local->sta_mtx);
dev_kfree_skb_any(params.tmpl_skb);
+free:
+ kfree(elems);
return ret;
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2d1193ed3eb5..a756a197c770 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -146,7 +146,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
switch (sband->band) {
- case NL80211_BAND_2GHZ: {
+ case NL80211_BAND_2GHZ:
+ case NL80211_BAND_LC: {
u32 flag;
if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
flag = IEEE80211_RATE_MANDATORY_G;
@@ -2209,7 +2210,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
}
vht_mcs = iterator.this_arg[4] >> 4;
+ if (vht_mcs > 11)
+ vht_mcs = 0;
vht_nss = iterator.this_arg[4] & 0xF;
+ if (!vht_nss || vht_nss > 8)
+ vht_nss = 1;
break;
/*
@@ -3380,6 +3385,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
goto out;
+ /* If n == 2, the "while (*frag_tail)" loop above didn't execute
+ * and frag_tail should be &skb_shinfo(head)->frag_list.
+ * However, ieee80211_amsdu_prepare_head() can reallocate it.
+ * Reload frag_tail to have it pointing to the correct place.
+ */
+ if (n == 2)
+ frag_tail = &skb_shinfo(head)->frag_list;
+
/*
* Pad out the previous subframe to a multiple of 4 by adding the
* padding to the next one, that's being added. Note that head->len
@@ -4979,6 +4992,115 @@ static int ieee80211_beacon_protect(struct sk_buff *skb,
return 0;
}
+static void
+ieee80211_beacon_get_finish(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs,
+ struct beacon_data *beacon,
+ struct sk_buff *skb,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ u16 csa_off_base)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_tx_info *info;
+ enum nl80211_band band;
+ struct ieee80211_tx_rate_control txrc;
+
+ /* CSA offsets */
+ if (offs && beacon) {
+ u16 i;
+
+ for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) {
+ u16 csa_off = beacon->cntdwn_counter_offsets[i];
+
+ if (!csa_off)
+ continue;
+
+ offs->cntdwn_counter_offs[i] = csa_off_base + csa_off;
+ }
+ }
+
+ band = chanctx_conf->def.chan->band;
+ info = IEEE80211_SKB_CB(skb);
+ info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+ info->flags |= IEEE80211_TX_CTL_NO_ACK;
+ info->band = band;
+
+ memset(&txrc, 0, sizeof(txrc));
+ txrc.hw = hw;
+ txrc.sband = local->hw.wiphy->bands[band];
+ txrc.bss_conf = &sdata->vif.bss_conf;
+ txrc.skb = skb;
+ txrc.reported_rate.idx = -1;
+ if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band])
+ txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band];
+ else
+ txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
+ txrc.bss = true;
+ rate_control_get_rate(sdata, NULL, &txrc);
+
+ info->control.vif = vif;
+ info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
+ IEEE80211_TX_CTL_ASSIGN_SEQ |
+ IEEE80211_TX_CTL_FIRST_FRAGMENT;
+}
+
+static struct sk_buff *
+ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs,
+ bool is_template,
+ struct beacon_data *beacon,
+ struct ieee80211_chanctx_conf *chanctx_conf)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_if_ap *ap = &sdata->u.ap;
+ struct sk_buff *skb = NULL;
+ u16 csa_off_base = 0;
+
+ if (beacon->cntdwn_counter_offsets[0]) {
+ if (!is_template)
+ ieee80211_beacon_update_cntdwn(vif);
+
+ ieee80211_set_beacon_cntdwn(sdata, beacon);
+ }
+
+ /* headroom, head length,
+ * tail length and maximum TIM length
+ */
+ skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
+ beacon->tail_len + 256 +
+ local->hw.extra_beacon_tailroom);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, local->tx_headroom);
+ skb_put_data(skb, beacon->head, beacon->head_len);
+
+ ieee80211_beacon_add_tim(sdata, &ap->ps, skb, is_template);
+
+ if (offs) {
+ offs->tim_offset = beacon->head_len;
+ offs->tim_length = skb->len - beacon->head_len;
+ offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
+
+ /* for AP the csa offsets are from tail */
+ csa_off_base = skb->len;
+ }
+
+ if (beacon->tail)
+ skb_put_data(skb, beacon->tail, beacon->tail_len);
+
+ if (ieee80211_beacon_protect(skb, local, sdata) < 0)
+ return NULL;
+
+ ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, chanctx_conf,
+ csa_off_base);
+ return skb;
+}
+
static struct sk_buff *
__ieee80211_beacon_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -4988,12 +5110,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
struct ieee80211_local *local = hw_to_local(hw);
struct beacon_data *beacon = NULL;
struct sk_buff *skb = NULL;
- struct ieee80211_tx_info *info;
struct ieee80211_sub_if_data *sdata = NULL;
- enum nl80211_band band;
- struct ieee80211_tx_rate_control txrc;
struct ieee80211_chanctx_conf *chanctx_conf;
- int csa_off_base = 0;
rcu_read_lock();
@@ -5010,48 +5128,11 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
struct ieee80211_if_ap *ap = &sdata->u.ap;
beacon = rcu_dereference(ap->beacon);
- if (beacon) {
- if (beacon->cntdwn_counter_offsets[0]) {
- if (!is_template)
- ieee80211_beacon_update_cntdwn(vif);
-
- ieee80211_set_beacon_cntdwn(sdata, beacon);
- }
-
- /*
- * headroom, head length,
- * tail length and maximum TIM length
- */
- skb = dev_alloc_skb(local->tx_headroom +
- beacon->head_len +
- beacon->tail_len + 256 +
- local->hw.extra_beacon_tailroom);
- if (!skb)
- goto out;
-
- skb_reserve(skb, local->tx_headroom);
- skb_put_data(skb, beacon->head, beacon->head_len);
-
- ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
- is_template);
-
- if (offs) {
- offs->tim_offset = beacon->head_len;
- offs->tim_length = skb->len - beacon->head_len;
- offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
-
- /* for AP the csa offsets are from tail */
- csa_off_base = skb->len;
- }
-
- if (beacon->tail)
- skb_put_data(skb, beacon->tail,
- beacon->tail_len);
-
- if (ieee80211_beacon_protect(skb, local, sdata) < 0)
- goto out;
- } else
+ if (!beacon)
goto out;
+
+ skb = ieee80211_beacon_get_ap(hw, vif, offs, is_template,
+ beacon, chanctx_conf);
} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_hdr *hdr;
@@ -5077,6 +5158,9 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
hdr = (struct ieee80211_hdr *) skb->data;
hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_BEACON);
+
+ ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb,
+ chanctx_conf, 0);
} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
@@ -5116,51 +5200,13 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
}
skb_put_data(skb, beacon->tail, beacon->tail_len);
+ ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb,
+ chanctx_conf, 0);
} else {
WARN_ON(1);
goto out;
}
- /* CSA offsets */
- if (offs && beacon) {
- int i;
-
- for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) {
- u16 csa_off = beacon->cntdwn_counter_offsets[i];
-
- if (!csa_off)
- continue;
-
- offs->cntdwn_counter_offs[i] = csa_off_base + csa_off;
- }
- }
-
- band = chanctx_conf->def.chan->band;
-
- info = IEEE80211_SKB_CB(skb);
-
- info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
- info->flags |= IEEE80211_TX_CTL_NO_ACK;
- info->band = band;
-
- memset(&txrc, 0, sizeof(txrc));
- txrc.hw = hw;
- txrc.sband = local->hw.wiphy->bands[band];
- txrc.bss_conf = &sdata->vif.bss_conf;
- txrc.skb = skb;
- txrc.reported_rate.idx = -1;
- if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band])
- txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band];
- else
- txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
- txrc.bss = true;
- rate_control_get_rate(sdata, NULL, &txrc);
-
- info->control.vif = vif;
-
- info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
- IEEE80211_TX_CTL_ASSIGN_SEQ |
- IEEE80211_TX_CTL_FIRST_FRAGMENT;
out:
rcu_read_unlock();
return skb;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 49cb96d25169..39fa2a50385d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1112,10 +1112,6 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
} else
elem_parse_failed = true;
break;
- case WLAN_EID_CHALLENGE:
- elems->challenge = pos;
- elems->challenge_len = elen;
- break;
case WLAN_EID_VENDOR_SPECIFIC:
if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
pos[2] == 0xf2) {
@@ -1395,8 +1391,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
struct ieee802_11_elems *elems,
- u8 *transmitter_bssid,
- u8 *bss_bssid,
+ const u8 *transmitter_bssid,
+ const u8 *bss_bssid,
u8 *nontransmitted_profile)
{
const struct element *elem, *sub;
@@ -1461,16 +1457,20 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
return found ? profile_len : 0;
}
-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
- struct ieee802_11_elems *elems,
- u64 filter, u32 crc, u8 *transmitter_bssid,
- u8 *bss_bssid)
+struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
+ bool action, u64 filter,
+ u32 crc,
+ const u8 *transmitter_bssid,
+ const u8 *bss_bssid)
{
+ struct ieee802_11_elems *elems;
const struct element *non_inherit = NULL;
u8 *nontransmitted_profile;
int nontransmitted_profile_len = 0;
- memset(elems, 0, sizeof(*elems));
+ elems = kzalloc(sizeof(*elems), GFP_ATOMIC);
+ if (!elems)
+ return NULL;
elems->ie_start = start;
elems->total_len = len;
@@ -1516,7 +1516,9 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
kfree(nontransmitted_profile);
- return crc;
+ elems->crc = crc;
+
+ return elems;
}
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -3383,6 +3385,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_sta_he_cap *he_cap;
struct cfg80211_chan_def he_chandef = *chandef;
const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+ struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
bool support_80_80, support_160;
u8 he_phy_cap;
u32 freq;
@@ -3427,6 +3430,19 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ bss_conf->power_type = IEEE80211_REG_LPI_AP;
+ break;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ bss_conf->power_type = IEEE80211_REG_SP_AP;
+ break;
+ default:
+ bss_conf->power_type = IEEE80211_REG_UNSET_AP;
+ break;
+ }
+
+ switch (u8_get_bits(he_6ghz_oper->control,
IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) {
case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ:
he_chandef.width = NL80211_CHAN_WIDTH_20;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bca47fad5a16..4eed23e27610 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 323d3d2d986f..500ed1b81250 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -129,15 +129,14 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
if (!ieee802154_is_valid_extended_unicast_addr(extended_addr))
return -EINVAL;
- memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ dev_addr_set(dev, addr->sa_data);
sdata->wpan_dev.extended_addr = extended_addr;
/* update lowpan interface mac address when
* wpan mac has been changed
*/
if (sdata->wpan_dev.lowpan_dev)
- memcpy(sdata->wpan_dev.lowpan_dev->dev_addr, dev->dev_addr,
- dev->addr_len);
+ dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr);
return mac802154_wpan_update_llsec(dev);
}
@@ -615,6 +614,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
unsigned char name_assign_type, enum nl802154_iftype type,
__le64 extended_addr)
{
+ u8 addr[IEEE802154_EXTENDED_ADDR_LEN];
struct net_device *ndev = NULL;
struct ieee802154_sub_if_data *sdata = NULL;
int ret;
@@ -638,11 +638,12 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
switch (type) {
case NL802154_IFTYPE_NODE:
ndev->type = ARPHRD_IEEE802154;
- if (ieee802154_is_valid_extended_unicast_addr(extended_addr))
- ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr);
- else
- memcpy(ndev->dev_addr, ndev->perm_addr,
- IEEE802154_EXTENDED_ADDR_LEN);
+ if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) {
+ ieee802154_le64_to_be64(addr, &extended_addr);
+ dev_addr_set(ndev, addr);
+ } else {
+ dev_addr_set(ndev, ndev->perm_addr);
+ }
break;
case NL802154_IFTYPE_MONITOR:
ndev->type = ARPHRD_IEEE802154_MONITOR;
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
index 2cdf3d0a28c9..3a5c0e70da77 100644
--- a/net/mctp/Kconfig
+++ b/net/mctp/Kconfig
@@ -1,7 +1,7 @@
menuconfig MCTP
depends on NET
- tristate "MCTP core protocol support"
+ bool "MCTP core protocol support"
help
Management Component Transport Protocol (MCTP) is an in-system
protocol for communicating between management controllers and
@@ -11,3 +11,13 @@ menuconfig MCTP
This option enables core MCTP support. For communicating with other
devices, you'll want to enable a driver for a specific hardware
channel.
+
+config MCTP_TEST
+ bool "MCTP core tests" if !KUNIT_ALL_TESTS
+ depends on MCTP=y && KUNIT=y
+ default KUNIT_ALL_TESTS
+
+config MCTP_FLOWS
+ bool
+ depends on MCTP
+ select SKB_EXTENSIONS
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
index 0171333384d7..6cd55233e685 100644
--- a/net/mctp/Makefile
+++ b/net/mctp/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MCTP) += mctp.o
mctp-objs := af_mctp.o device.o route.o neigh.o
+
+# tests
+obj-$(CONFIG_MCTP_TEST) += test/utils.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index a9526ac29dff..871cf6266125 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -16,6 +16,9 @@
#include <net/mctpdevice.h>
#include <net/sock.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mctp.h>
+
/* socket implementation */
static int mctp_release(struct socket *sock)
@@ -30,6 +33,19 @@ static int mctp_release(struct socket *sock)
return 0;
}
+/* Generic sockaddr checks, padding checks only so far */
+static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp *addr)
+{
+ return !addr->__smctp_pad0 && !addr->__smctp_pad1;
+}
+
+static bool mctp_sockaddr_ext_is_ok(const struct sockaddr_mctp_ext *addr)
+{
+ return !addr->__smctp_pad0[0] &&
+ !addr->__smctp_pad0[1] &&
+ !addr->__smctp_pad0[2];
+}
+
static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
struct sock *sk = sock->sk;
@@ -49,6 +65,9 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
/* it's a valid sockaddr for MCTP, cast and do protocol checks */
smctp = (struct sockaddr_mctp *)addr;
+ if (!mctp_sockaddr_is_ok(smctp))
+ return -EINVAL;
+
lock_sock(sk);
/* TODO: allow rebind */
@@ -74,6 +93,7 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
int rc, addrlen = msg->msg_namelen;
struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb;
struct mctp_route *rt;
struct sk_buff *skb;
@@ -83,6 +103,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
return -EINVAL;
if (addr->smctp_family != AF_MCTP)
return -EINVAL;
+ if (!mctp_sockaddr_is_ok(addr))
+ return -EINVAL;
if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
return -EINVAL;
@@ -97,11 +119,6 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (addr->smctp_network == MCTP_NET_ANY)
addr->smctp_network = mctp_default_net(sock_net(sk));
- rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
- addr->smctp_addr.s_addr);
- if (!rt)
- return -EHOSTUNREACH;
-
skb = sock_alloc_send_skb(sk, hlen + 1 + len,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
@@ -113,19 +130,46 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
*(u8 *)skb_put(skb, 1) = addr->smctp_type;
rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
- if (rc < 0) {
- kfree_skb(skb);
- return rc;
- }
+ if (rc < 0)
+ goto err_free;
/* set up cb */
cb = __mctp_cb(skb);
cb->net = addr->smctp_network;
+ /* direct addressing */
+ if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+ DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
+ extaddr, msg->msg_name);
+
+ if (!mctp_sockaddr_ext_is_ok(extaddr) ||
+ extaddr->smctp_halen > sizeof(cb->haddr)) {
+ rc = -EINVAL;
+ goto err_free;
+ }
+
+ cb->ifindex = extaddr->smctp_ifindex;
+ cb->halen = extaddr->smctp_halen;
+ memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen);
+
+ rt = NULL;
+ } else {
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt) {
+ rc = -EHOSTUNREACH;
+ goto err_free;
+ }
+ }
+
rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
addr->smctp_tag);
return rc ? : len;
+
+err_free:
+ kfree_skb(skb);
+ return rc;
}
static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
@@ -133,6 +177,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
{
DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct sk_buff *skb;
size_t msglen;
u8 type;
@@ -172,12 +217,25 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
addr = msg->msg_name;
addr->smctp_family = AF_MCTP;
+ addr->__smctp_pad0 = 0;
addr->smctp_network = cb->net;
addr->smctp_addr.s_addr = hdr->src;
addr->smctp_type = type;
addr->smctp_tag = hdr->flags_seq_tag &
(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+ addr->__smctp_pad1 = 0;
msg->msg_namelen = sizeof(*addr);
+
+ if (msk->addr_ext) {
+ DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, ae,
+ msg->msg_name);
+ msg->msg_namelen = sizeof(*ae);
+ ae->smctp_ifindex = cb->ifindex;
+ ae->smctp_halen = cb->halen;
+ memset(ae->__smctp_pad0, 0x0, sizeof(ae->__smctp_pad0));
+ memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr));
+ memcpy(ae->smctp_haddr, cb->haddr, cb->halen);
+ }
}
rc = len;
@@ -193,12 +251,45 @@ out_free:
static int mctp_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
- return -EINVAL;
+ struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+ int val;
+
+ if (level != SOL_MCTP)
+ return -EINVAL;
+
+ if (optname == MCTP_OPT_ADDR_EXT) {
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
+ return -EFAULT;
+ msk->addr_ext = val;
+ return 0;
+ }
+
+ return -ENOPROTOOPT;
}
static int mctp_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
+ struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+ int len, val;
+
+ if (level != SOL_MCTP)
+ return -EINVAL;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (optname == MCTP_OPT_ADDR_EXT) {
+ if (len != sizeof(int))
+ return -EINVAL;
+ val = !!msk->addr_ext;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+ return 0;
+ }
+
return -EINVAL;
}
@@ -223,16 +314,61 @@ static const struct proto_ops mctp_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static void mctp_sk_expire_keys(struct timer_list *timer)
+{
+ struct mctp_sock *msk = container_of(timer, struct mctp_sock,
+ key_expiry);
+ struct net *net = sock_net(&msk->sk);
+ unsigned long next_expiry, flags;
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ bool next_expiry_valid = false;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ spin_lock(&key->lock);
+
+ if (!time_after_eq(key->expiry, jiffies)) {
+ trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
+ key->valid = false;
+ hlist_del_rcu(&key->hlist);
+ hlist_del_rcu(&key->sklist);
+ spin_unlock(&key->lock);
+ mctp_key_unref(key);
+ continue;
+ }
+
+ if (next_expiry_valid) {
+ if (time_before(key->expiry, next_expiry))
+ next_expiry = key->expiry;
+ } else {
+ next_expiry = key->expiry;
+ next_expiry_valid = true;
+ }
+ spin_unlock(&key->lock);
+ }
+
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ if (next_expiry_valid)
+ mod_timer(timer, next_expiry);
+}
+
static int mctp_sk_init(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
INIT_HLIST_HEAD(&msk->keys);
+ timer_setup(&msk->key_expiry, mctp_sk_expire_keys, 0);
return 0;
}
static void mctp_sk_close(struct sock *sk, long timeout)
{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ del_timer_sync(&msk->key_expiry);
sk_common_release(sk);
}
@@ -263,21 +399,23 @@ static void mctp_sk_unhash(struct sock *sk)
/* remove tag allocations */
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- hlist_del_rcu(&key->sklist);
- hlist_del_rcu(&key->hlist);
+ hlist_del(&key->sklist);
+ hlist_del(&key->hlist);
+
+ trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);
- spin_lock(&key->reasm_lock);
+ spin_lock(&key->lock);
if (key->reasm_head)
kfree_skb(key->reasm_head);
key->reasm_head = NULL;
key->reasm_dead = true;
- spin_unlock(&key->reasm_lock);
+ key->valid = false;
+ spin_unlock(&key->lock);
- kfree_rcu(key, rcu);
+ /* key is no longer on the lookup lists, unref */
+ mctp_key_unref(key);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
-
- synchronize_rcu();
}
static struct proto mctp_proto = {
@@ -385,7 +523,7 @@ static __exit void mctp_exit(void)
sock_unregister(PF_MCTP);
}
-module_init(mctp_init);
+subsys_initcall(mctp_init);
module_exit(mctp_exit);
MODULE_DESCRIPTION("MCTP core");
diff --git a/net/mctp/device.c b/net/mctp/device.c
index b9f38e765f61..8799ee77e7b7 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -35,14 +35,6 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
return rtnl_dereference(dev->mctp_ptr);
}
-static void mctp_dev_destroy(struct mctp_dev *mdev)
-{
- struct net_device *dev = mdev->dev;
-
- dev_put(dev);
- kfree_rcu(mdev, rcu);
-}
-
static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
struct mctp_dev *mdev, mctp_eid_t eid)
{
@@ -255,6 +247,37 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+void mctp_dev_hold(struct mctp_dev *mdev)
+{
+ refcount_inc(&mdev->refs);
+}
+
+void mctp_dev_put(struct mctp_dev *mdev)
+{
+ if (refcount_dec_and_test(&mdev->refs)) {
+ dev_put(mdev->dev);
+ kfree_rcu(mdev, rcu);
+ }
+}
+
+void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key)
+ __must_hold(&key->lock)
+{
+ if (!dev)
+ return;
+ if (dev->ops && dev->ops->release_flow)
+ dev->ops->release_flow(dev, key);
+ key->dev = NULL;
+ mctp_dev_put(dev);
+}
+
+void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key)
+ __must_hold(&key->lock)
+{
+ mctp_dev_hold(dev);
+ key->dev = dev;
+}
+
static struct mctp_dev *mctp_add_dev(struct net_device *dev)
{
struct mctp_dev *mdev;
@@ -270,7 +293,9 @@ static struct mctp_dev *mctp_add_dev(struct net_device *dev)
mdev->net = mctp_default_net(dev_net(dev));
/* associate to net_device */
+ refcount_set(&mdev->refs, 1);
rcu_assign_pointer(dev->mctp_ptr, mdev);
+
dev_hold(dev);
mdev->dev = dev;
@@ -330,12 +355,26 @@ static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
return 0;
}
+/* Matches netdev types that should have MCTP handling */
+static bool mctp_known(struct net_device *dev)
+{
+ /* only register specific types (inc. NONE for TUN devices) */
+ return dev->type == ARPHRD_MCTP ||
+ dev->type == ARPHRD_LOOPBACK ||
+ dev->type == ARPHRD_NONE;
+}
+
static void mctp_unregister(struct net_device *dev)
{
struct mctp_dev *mdev;
mdev = mctp_dev_get_rtnl(dev);
-
+ if (mctp_known(dev) != (bool)mdev) {
+ // Sanity check, should match what was set in mctp_register
+ netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d",
+ __func__, (bool)mdev, mctp_known(dev), dev->type);
+ return;
+ }
if (!mdev)
return;
@@ -345,7 +384,7 @@ static void mctp_unregister(struct net_device *dev)
mctp_neigh_remove_dev(mdev);
kfree(mdev->addrs);
- mctp_dev_destroy(mdev);
+ mctp_dev_put(mdev);
}
static int mctp_register(struct net_device *dev)
@@ -353,11 +392,17 @@ static int mctp_register(struct net_device *dev)
struct mctp_dev *mdev;
/* Already registered? */
- if (rtnl_dereference(dev->mctp_ptr))
+ mdev = rtnl_dereference(dev->mctp_ptr);
+
+ if (mdev) {
+ if (!mctp_known(dev))
+ netdev_warn(dev, "%s: mctp_dev set for unknown type %d",
+ __func__, dev->type);
return 0;
+ }
- /* only register specific types; MCTP-specific and loopback for now */
- if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ /* only register specific types */
+ if (!mctp_known(dev))
return 0;
mdev = mctp_add_dev(dev);
@@ -387,6 +432,39 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
}
+static int mctp_register_netdevice(struct net_device *dev,
+ const struct mctp_netdev_ops *ops)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_add_dev(dev);
+ if (IS_ERR(mdev))
+ return PTR_ERR(mdev);
+
+ mdev->ops = ops;
+
+ return register_netdevice(dev);
+}
+
+int mctp_register_netdev(struct net_device *dev,
+ const struct mctp_netdev_ops *ops)
+{
+ int rc;
+
+ rtnl_lock();
+ rc = mctp_register_netdevice(dev, ops);
+ rtnl_unlock();
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(mctp_register_netdev);
+
+void mctp_unregister_netdev(struct net_device *dev)
+{
+ unregister_netdev(dev);
+}
+EXPORT_SYMBOL_GPL(mctp_unregister_netdev);
+
static struct rtnl_af_ops mctp_af_ops = {
.family = AF_MCTP,
.fill_link_af = mctp_fill_link_af,
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
index 90ed2f02d1fb..5cc042121493 100644
--- a/net/mctp/neigh.c
+++ b/net/mctp/neigh.c
@@ -47,7 +47,7 @@ static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
}
INIT_LIST_HEAD(&neigh->list);
neigh->dev = mdev;
- dev_hold(neigh->dev->dev);
+ mctp_dev_hold(neigh->dev);
neigh->eid = eid;
neigh->source = source;
memcpy(neigh->ha, lladdr, lladdr_len);
@@ -63,7 +63,7 @@ static void __mctp_neigh_free(struct rcu_head *rcu)
{
struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
- dev_put(neigh->dev->dev);
+ mctp_dev_put(neigh->dev);
kfree(neigh);
}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 5265525011ad..46c44823edb7 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -11,6 +11,7 @@
*/
#include <linux/idr.h>
+#include <linux/kconfig.h>
#include <linux/mctp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
@@ -23,7 +24,12 @@
#include <net/netlink.h>
#include <net/sock.h>
+#include <trace/events/mctp.h>
+
static const unsigned int mctp_message_maxlen = 64 * 1024;
+static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
+
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
/* route output callbacks */
static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
@@ -83,25 +89,43 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
return true;
}
+/* returns a key (with key->lock held, and refcounted), or NULL if no such
+ * key exists.
+ */
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
- mctp_eid_t peer)
+ mctp_eid_t peer,
+ unsigned long *irqflags)
+ __acquires(&key->lock)
{
struct mctp_sk_key *key, *ret;
+ unsigned long flags;
struct mctp_hdr *mh;
u8 tag;
- WARN_ON(!rcu_read_lock_held());
-
mh = mctp_hdr(skb);
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
ret = NULL;
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
- hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
- if (mctp_key_match(key, mh->dest, peer, tag)) {
+ hlist_for_each_entry(key, &net->mctp.keys, hlist) {
+ if (!mctp_key_match(key, mh->dest, peer, tag))
+ continue;
+
+ spin_lock(&key->lock);
+ if (key->valid) {
+ refcount_inc(&key->refs);
ret = key;
break;
}
+ spin_unlock(&key->lock);
+ }
+
+ if (ret) {
+ spin_unlock(&net->mctp.keys_lock);
+ *irqflags = flags;
+ } else {
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
}
return ret;
@@ -121,11 +145,30 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
key->local_addr = local;
key->tag = tag;
key->sk = &msk->sk;
- spin_lock_init(&key->reasm_lock);
+ key->valid = true;
+ spin_lock_init(&key->lock);
+ refcount_set(&key->refs, 1);
return key;
}
+void mctp_key_unref(struct mctp_sk_key *key)
+{
+ unsigned long flags;
+
+ if (!refcount_dec_and_test(&key->refs))
+ return;
+
+ /* even though no refs exist here, the lock allows us to stay
+ * consistent with the locking requirement of mctp_dev_release_key
+ */
+ spin_lock_irqsave(&key->lock, flags);
+ mctp_dev_release_key(key->dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
+
+ kfree(key);
+}
+
static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
{
struct net *net = sock_net(&msk->sk);
@@ -138,12 +181,20 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
key->tag)) {
- rc = -EEXIST;
- break;
+ spin_lock(&tmp->lock);
+ if (tmp->valid)
+ rc = -EEXIST;
+ spin_unlock(&tmp->lock);
+ if (rc)
+ break;
}
}
if (!rc) {
+ refcount_inc(&key->refs);
+ key->expiry = jiffies + mctp_key_lifetime;
+ timer_reduce(&msk->key_expiry, key->expiry);
+
hlist_add_head(&key->hlist, &net->mctp.keys);
hlist_add_head(&key->sklist, &msk->keys);
}
@@ -153,30 +204,72 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
return rc;
}
-/* Must be called with key->reasm_lock, which it will release. Will schedule
- * the key for an RCU free.
+/* We're done with the key; unset valid and remove from lists. There may still
+ * be outstanding refs on the key though...
*/
static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
unsigned long flags)
- __releases(&key->reasm_lock)
+ __releases(&key->lock)
{
struct sk_buff *skb;
skb = key->reasm_head;
key->reasm_head = NULL;
key->reasm_dead = true;
- spin_unlock_irqrestore(&key->reasm_lock, flags);
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
spin_lock_irqsave(&net->mctp.keys_lock, flags);
- hlist_del_rcu(&key->hlist);
- hlist_del_rcu(&key->sklist);
+ hlist_del(&key->hlist);
+ hlist_del(&key->sklist);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
- kfree_rcu(key, rcu);
+
+ /* one unref for the lists */
+ mctp_key_unref(key);
+
+ /* and one for the local reference */
+ mctp_key_unref(key);
if (skb)
kfree_skb(skb);
+
+}
+
+#ifdef CONFIG_MCTP_FLOWS
+static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
+{
+ struct mctp_flow *flow;
+
+ flow = skb_ext_add(skb, SKB_EXT_MCTP);
+ if (!flow)
+ return;
+
+ refcount_inc(&key->refs);
+ flow->key = key;
}
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
+{
+ struct mctp_sk_key *key;
+ struct mctp_flow *flow;
+
+ flow = skb_ext_find(skb, SKB_EXT_MCTP);
+ if (!flow)
+ return;
+
+ key = flow->key;
+
+ if (WARN_ON(key->dev && key->dev != dev))
+ return;
+
+ mctp_dev_set_key(dev, key);
+}
+#else
+static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
+static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
+#endif
+
static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
{
struct mctp_hdr *hdr = mctp_hdr(skb);
@@ -248,8 +341,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
rcu_read_lock();
- /* lookup socket / reasm context, exactly matching (src,dest,tag) */
- key = mctp_lookup_key(net, skb, mh->src);
+ /* lookup socket / reasm context, exactly matching (src,dest,tag).
+ * we hold a ref on the key, and key->lock held.
+ */
+ key = mctp_lookup_key(net, skb, mh->src, &f);
if (flags & MCTP_HDR_FLAG_SOM) {
if (key) {
@@ -260,10 +355,12 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
*/
- key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
if (key) {
msk = container_of(key->sk,
struct mctp_sock, sk);
+ spin_unlock_irqrestore(&key->lock, f);
+ mctp_key_unref(key);
key = NULL;
}
}
@@ -282,11 +379,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
if (flags & MCTP_HDR_FLAG_EOM) {
sock_queue_rcv_skb(&msk->sk, skb);
if (key) {
- spin_lock_irqsave(&key->reasm_lock, f);
/* we've hit a pending reassembly; not much we
* can do but drop it
*/
+ trace_mctp_key_release(key,
+ MCTP_TRACE_KEY_REPLIED);
__mctp_key_unlock_drop(key, net, f);
+ key = NULL;
}
rc = 0;
goto out_unlock;
@@ -303,7 +402,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
goto out_unlock;
}
- /* we can queue without the reasm lock here, as the
+ /* we can queue without the key lock here, as the
* key isn't observable yet
*/
mctp_frag_queue(key, skb);
@@ -318,17 +417,22 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
if (rc)
kfree(key);
- } else {
- /* existing key: start reassembly */
- spin_lock_irqsave(&key->reasm_lock, f);
+ trace_mctp_key_acquire(key);
+
+ /* we don't need to release key->lock on exit */
+ mctp_key_unref(key);
+ key = NULL;
+ } else {
if (key->reasm_head || key->reasm_dead) {
/* duplicate start? drop everything */
+ trace_mctp_key_release(key,
+ MCTP_TRACE_KEY_INVALIDATED);
__mctp_key_unlock_drop(key, net, f);
rc = -EEXIST;
+ key = NULL;
} else {
rc = mctp_frag_queue(key, skb);
- spin_unlock_irqrestore(&key->reasm_lock, f);
}
}
@@ -337,8 +441,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* using the message-specific key
*/
- spin_lock_irqsave(&key->reasm_lock, f);
-
/* we need to be continuing an existing reassembly... */
if (!key->reasm_head)
rc = -EINVAL;
@@ -351,9 +453,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
if (!rc && flags & MCTP_HDR_FLAG_EOM) {
sock_queue_rcv_skb(key->sk, key->reasm_head);
key->reasm_head = NULL;
+ trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
__mctp_key_unlock_drop(key, net, f);
- } else {
- spin_unlock_irqrestore(&key->reasm_lock, f);
+ key = NULL;
}
} else {
@@ -363,6 +465,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
out_unlock:
rcu_read_unlock();
+ if (key) {
+ spin_unlock_irqrestore(&key->lock, f);
+ mctp_key_unref(key);
+ }
out:
if (rc)
kfree_skb(skb);
@@ -376,6 +482,7 @@ static unsigned int mctp_route_mtu(struct mctp_route *rt)
static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *hdr = mctp_hdr(skb);
char daddr_buf[MAX_ADDR_LEN];
char *daddr = NULL;
@@ -390,9 +497,14 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
return -EMSGSIZE;
}
- /* If lookup fails let the device handle daddr==NULL */
- if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
- daddr = daddr_buf;
+ if (cb->ifindex) {
+ /* direct route; use the hwaddr we stashed in sendmsg */
+ daddr = cb->haddr;
+ } else {
+ /* If lookup fails let the device handle daddr==NULL */
+ if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ daddr = daddr_buf;
+ }
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
daddr, skb->dev->dev_addr, skb->len);
@@ -401,6 +513,8 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
return -EHOSTUNREACH;
}
+ mctp_flow_prepare_output(skb, route->dev);
+
rc = dev_queue_xmit(skb);
if (rc)
rc = net_xmit_errno(rc);
@@ -412,7 +526,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
static void mctp_route_release(struct mctp_route *rt)
{
if (refcount_dec_and_test(&rt->refs)) {
- dev_put(rt->dev->dev);
+ mctp_dev_put(rt->dev);
kfree_rcu(rt, rcu);
}
}
@@ -454,30 +568,38 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
lockdep_assert_held(&mns->keys_lock);
+ key->expiry = jiffies + mctp_key_lifetime;
+ timer_reduce(&msk->key_expiry, key->expiry);
+
/* we hold the net->key_lock here, allowing updates to both
* then net and sk
*/
hlist_add_head_rcu(&key->hlist, &mns->keys);
hlist_add_head_rcu(&key->sklist, &msk->keys);
+ refcount_inc(&key->refs);
}
/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
* it for the socket msk
*/
-static int mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t saddr,
+ mctp_eid_t daddr, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
struct netns_mctp *mns = &net->mctp;
struct mctp_sk_key *key, *tmp;
unsigned long flags;
- int rc = -EAGAIN;
u8 tagbits;
+ /* for NULL destination EIDs, we may get a response from any peer */
+ if (daddr == MCTP_ADDR_NULL)
+ daddr = MCTP_ADDR_ANY;
+
/* be optimistic, alloc now */
key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
if (!key)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* 8 possible tag values */
tagbits = 0xff;
@@ -488,14 +610,26 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk,
* tags. If we find a conflict, clear that bit from tagbits
*/
hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ /* We can check the lookup fields (*_addr, tag) without the
+ * lock held, they don't change over the lifetime of the key.
+ */
+
/* if we don't own the tag, it can't conflict */
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if ((tmp->peer_addr == daddr ||
- tmp->peer_addr == MCTP_ADDR_ANY) &&
- tmp->local_addr == saddr)
+ if (!((tmp->peer_addr == daddr ||
+ tmp->peer_addr == MCTP_ADDR_ANY) &&
+ tmp->local_addr == saddr))
+ continue;
+
+ spin_lock(&tmp->lock);
+ /* key must still be valid. If we find a match, clear the
+ * potential tag value
+ */
+ if (tmp->valid)
tagbits &= ~(1 << tmp->tag);
+ spin_unlock(&tmp->lock);
if (!tagbits)
break;
@@ -504,16 +638,19 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk,
if (tagbits) {
key->tag = __ffs(tagbits);
mctp_reserve_tag(net, key, msk);
+ trace_mctp_key_acquire(key);
+
*tagp = key->tag;
- rc = 0;
}
spin_unlock_irqrestore(&mns->keys_lock, flags);
- if (!tagbits)
+ if (!tagbits) {
kfree(key);
+ return ERR_PTR(-EBUSY);
+ }
- return rc;
+ return key;
}
/* routing lookups */
@@ -552,14 +689,18 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
return rt;
}
-/* sends a skb to rt and releases the route. */
-int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+static struct mctp_route *mctp_route_lookup_null(struct net *net,
+ struct net_device *dev)
{
- int rc;
+ struct mctp_route *rt;
- rc = rt->output(rt, skb);
- mctp_route_release(rt);
- return rc;
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (rt->dev->dev == dev && rt->type == RTN_LOCAL &&
+ refcount_inc_not_zero(&rt->refs))
+ return rt;
+ }
+
+ return NULL;
}
static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
@@ -628,7 +769,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
/* copy message payload */
skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
- /* do route, but don't drop the rt reference */
+ /* do route */
rc = rt->output(rt, skb2);
if (rc)
break;
@@ -637,7 +778,6 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
pos += size;
}
- mctp_route_release(rt);
consume_skb(skb);
return rc;
}
@@ -647,15 +787,52 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_route tmp_rt;
+ struct mctp_sk_key *key;
+ struct net_device *dev;
struct mctp_hdr *hdr;
unsigned long flags;
unsigned int mtu;
mctp_eid_t saddr;
+ bool ext_rt;
int rc;
u8 tag;
- if (WARN_ON(!rt->dev))
+ rc = -ENODEV;
+
+ if (rt) {
+ ext_rt = false;
+ dev = NULL;
+
+ if (WARN_ON(!rt->dev))
+ goto out_release;
+
+ } else if (cb->ifindex) {
+ ext_rt = true;
+ rt = &tmp_rt;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ return rc;
+ }
+
+ rt->dev = __mctp_dev_get(dev);
+ rcu_read_unlock();
+
+ if (!rt->dev)
+ goto out_release;
+
+ /* establish temporary route - we set up enough to keep
+ * mctp_route_output happy
+ */
+ rt->output = mctp_route_output;
+ rt->mtu = 0;
+
+ } else {
return -EINVAL;
+ }
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
if (rt->dev->num_addrs == 0) {
@@ -668,18 +845,23 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
if (rc)
- return rc;
+ goto out_release;
if (req_tag & MCTP_HDR_FLAG_TO) {
- rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
- if (rc)
- return rc;
+ key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (IS_ERR(key)) {
+ rc = PTR_ERR(key);
+ goto out_release;
+ }
+ mctp_skb_set_flow(skb, key);
+ /* done with the key in this scope */
+ mctp_key_unref(key);
tag |= MCTP_HDR_FLAG_TO;
} else {
+ key = NULL;
tag = req_tag;
}
-
skb->protocol = htons(ETH_P_MCTP);
skb->priority = 0;
skb_reset_transport_header(skb);
@@ -699,12 +881,22 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
mtu = mctp_route_mtu(rt);
if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
- hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
- tag;
- return mctp_do_route(rt, skb);
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
+ MCTP_HDR_FLAG_EOM | tag;
+ rc = rt->output(rt, skb);
} else {
- return mctp_do_fragment_route(rt, skb, mtu, tag);
+ rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}
+
+out_release:
+ if (!ext_rt)
+ mctp_route_release(rt);
+
+ if (dev)
+ dev_put(dev);
+
+ return rc;
+
}
/* route management */
@@ -741,7 +933,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
rt->max = daddr_start + daddr_extent;
rt->mtu = mtu;
rt->dev = mdev;
- dev_hold(rt->dev->dev);
+ mctp_dev_hold(rt->dev);
rt->type = type;
rt->output = rtfn;
@@ -821,13 +1013,18 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
struct net_device *orig_dev)
{
struct net *net = dev_net(dev);
+ struct mctp_dev *mdev;
struct mctp_skb_cb *cb;
struct mctp_route *rt;
struct mctp_hdr *mh;
- /* basic non-data sanity checks */
- if (dev->type != ARPHRD_MCTP)
+ rcu_read_lock();
+ mdev = __mctp_dev_get(dev);
+ rcu_read_unlock();
+ if (!mdev) {
+ /* basic non-data sanity checks */
goto err_drop;
+ }
if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
goto err_drop;
@@ -840,16 +1037,27 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
goto err_drop;
- cb = __mctp_cb(skb);
- rcu_read_lock();
- cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
- rcu_read_unlock();
+ /* MCTP drivers must populate halen/haddr */
+ if (dev->type == ARPHRD_MCTP) {
+ cb = mctp_cb(skb);
+ } else {
+ cb = __mctp_cb(skb);
+ cb->halen = 0;
+ }
+ cb->net = READ_ONCE(mdev->net);
+ cb->ifindex = dev->ifindex;
rt = mctp_route_lookup(net, cb->net, mh->dest);
+
+ /* NULL EID, but addressed to our physical address */
+ if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
+ rt = mctp_route_lookup_null(net, dev);
+
if (!rt)
goto err_drop;
- mctp_do_route(rt, skb);
+ rt->output(rt, skb);
+ mctp_route_release(rt);
return NET_RX_SUCCESS;
@@ -926,10 +1134,15 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
+ [RTAX_MTU] = { .type = NLA_U32 },
+};
+
static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RTA_MAX + 1];
+ struct nlattr *tbx[RTAX_MAX + 1];
mctp_eid_t daddr_start;
struct mctp_dev *mdev;
struct rtmsg *rtm;
@@ -946,8 +1159,15 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- /* TODO: parse mtu from nlparse */
mtu = 0;
+ if (tb[RTA_METRICS]) {
+ rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
+ rta_metrics_policy, NULL);
+ if (rc < 0)
+ return rc;
+ if (tbx[RTAX_MTU])
+ mtu = nla_get_u32(tbx[RTAX_MTU]);
+ }
if (rtm->rtm_type != RTN_UNICAST)
return -EINVAL;
@@ -1083,8 +1303,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net)
{
struct mctp_route *rt;
+ rcu_read_lock();
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
mctp_route_release(rt);
+ rcu_read_unlock();
}
static struct pernet_operations mctp_net_ops = {
@@ -1114,3 +1336,7 @@ void __exit mctp_routes_exit(void)
rtnl_unregister(PF_MCTP, RTM_GETROUTE);
dev_remove_pack(&mctp_packet_type);
}
+
+#if IS_ENABLED(CONFIG_MCTP_TEST)
+#include "test/route-test.c"
+#endif
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
new file mode 100644
index 000000000000..36fac3daf86a
--- /dev/null
+++ b/net/mctp/test/route-test.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/test.h>
+
+#include "utils.h"
+
+struct mctp_test_route {
+ struct mctp_route rt;
+ struct sk_buff_head pkts;
+};
+
+static int mctp_test_route_output(struct mctp_route *rt, struct sk_buff *skb)
+{
+ struct mctp_test_route *test_rt = container_of(rt, struct mctp_test_route, rt);
+
+ skb_queue_tail(&test_rt->pkts, skb);
+
+ return 0;
+}
+
+/* local version of mctp_route_alloc() */
+static struct mctp_test_route *mctp_route_test_alloc(void)
+{
+ struct mctp_test_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->rt.list);
+ refcount_set(&rt->rt.refs, 1);
+ rt->rt.output = mctp_test_route_output;
+
+ skb_queue_head_init(&rt->pkts);
+
+ return rt;
+}
+
+static struct mctp_test_route *mctp_test_create_route(struct net *net,
+ struct mctp_dev *dev,
+ mctp_eid_t eid,
+ unsigned int mtu)
+{
+ struct mctp_test_route *rt;
+
+ rt = mctp_route_test_alloc();
+ if (!rt)
+ return NULL;
+
+ rt->rt.min = eid;
+ rt->rt.max = eid;
+ rt->rt.mtu = mtu;
+ rt->rt.type = RTN_UNSPEC;
+ if (dev)
+ mctp_dev_hold(dev);
+ rt->rt.dev = dev;
+
+ list_add_rcu(&rt->rt.list, &net->mctp.routes);
+
+ return rt;
+}
+
+static void mctp_test_route_destroy(struct kunit *test,
+ struct mctp_test_route *rt)
+{
+ unsigned int refs;
+
+ rtnl_lock();
+ list_del_rcu(&rt->rt.list);
+ rtnl_unlock();
+
+ skb_queue_purge(&rt->pkts);
+ if (rt->rt.dev)
+ mctp_dev_put(rt->rt.dev);
+
+ refs = refcount_read(&rt->rt.refs);
+ KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance");
+
+ kfree_rcu(&rt->rt, rcu);
+}
+
+static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
+ unsigned int data_len)
+{
+ size_t hdr_len = sizeof(*hdr);
+ struct sk_buff *skb;
+ unsigned int i;
+ u8 *buf;
+
+ skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
+
+ buf = skb_put(skb, data_len);
+ for (i = 0; i < data_len; i++)
+ buf[i] = i & 0xff;
+
+ return skb;
+}
+
+static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
+ const void *data,
+ size_t data_len)
+{
+ size_t hdr_len = sizeof(*hdr);
+ struct sk_buff *skb;
+
+ skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
+ memcpy(skb_put(skb, data_len), data, data_len);
+
+ return skb;
+}
+
+#define mctp_test_create_skb_data(h, d) \
+ __mctp_test_create_skb_data(h, d, sizeof(*d))
+
+struct mctp_frag_test {
+ unsigned int mtu;
+ unsigned int msgsize;
+ unsigned int n_frags;
+};
+
+static void mctp_test_fragment(struct kunit *test)
+{
+ const struct mctp_frag_test *params;
+ int rc, i, n, mtu, msgsize;
+ struct mctp_test_route *rt;
+ struct sk_buff *skb;
+ struct mctp_hdr hdr;
+ u8 seq;
+
+ params = test->param_value;
+ mtu = params->mtu;
+ msgsize = params->msgsize;
+
+ hdr.ver = 1;
+ hdr.src = 8;
+ hdr.dest = 10;
+ hdr.flags_seq_tag = MCTP_HDR_FLAG_TO;
+
+ skb = mctp_test_create_skb(&hdr, msgsize);
+ KUNIT_ASSERT_TRUE(test, skb);
+
+ rt = mctp_test_create_route(&init_net, NULL, 10, mtu);
+ KUNIT_ASSERT_TRUE(test, rt);
+
+ /* The refcount would usually be incremented as part of a route lookup,
+ * but we're setting the route directly here.
+ */
+ refcount_inc(&rt->rt.refs);
+
+ rc = mctp_do_fragment_route(&rt->rt, skb, mtu, MCTP_TAG_OWNER);
+ KUNIT_EXPECT_FALSE(test, rc);
+
+ n = rt->pkts.qlen;
+
+ KUNIT_EXPECT_EQ(test, n, params->n_frags);
+
+ for (i = 0;; i++) {
+ struct mctp_hdr *hdr2;
+ struct sk_buff *skb2;
+ u8 tag_mask, seq2;
+ bool first, last;
+
+ first = i == 0;
+ last = i == (n - 1);
+
+ skb2 = skb_dequeue(&rt->pkts);
+
+ if (!skb2)
+ break;
+
+ hdr2 = mctp_hdr(skb2);
+
+ tag_mask = MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO;
+
+ KUNIT_EXPECT_EQ(test, hdr2->ver, hdr.ver);
+ KUNIT_EXPECT_EQ(test, hdr2->src, hdr.src);
+ KUNIT_EXPECT_EQ(test, hdr2->dest, hdr.dest);
+ KUNIT_EXPECT_EQ(test, hdr2->flags_seq_tag & tag_mask,
+ hdr.flags_seq_tag & tag_mask);
+
+ KUNIT_EXPECT_EQ(test,
+ !!(hdr2->flags_seq_tag & MCTP_HDR_FLAG_SOM), first);
+ KUNIT_EXPECT_EQ(test,
+ !!(hdr2->flags_seq_tag & MCTP_HDR_FLAG_EOM), last);
+
+ seq2 = (hdr2->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) &
+ MCTP_HDR_SEQ_MASK;
+
+ if (first) {
+ seq = seq2;
+ } else {
+ seq++;
+ KUNIT_EXPECT_EQ(test, seq2, seq & MCTP_HDR_SEQ_MASK);
+ }
+
+ if (!last)
+ KUNIT_EXPECT_EQ(test, skb2->len, mtu);
+ else
+ KUNIT_EXPECT_LE(test, skb2->len, mtu);
+
+ kfree_skb(skb2);
+ }
+
+ mctp_test_route_destroy(test, rt);
+}
+
+static const struct mctp_frag_test mctp_frag_tests[] = {
+ {.mtu = 68, .msgsize = 63, .n_frags = 1},
+ {.mtu = 68, .msgsize = 64, .n_frags = 1},
+ {.mtu = 68, .msgsize = 65, .n_frags = 2},
+ {.mtu = 68, .msgsize = 66, .n_frags = 2},
+ {.mtu = 68, .msgsize = 127, .n_frags = 2},
+ {.mtu = 68, .msgsize = 128, .n_frags = 2},
+ {.mtu = 68, .msgsize = 129, .n_frags = 3},
+ {.mtu = 68, .msgsize = 130, .n_frags = 3},
+};
+
+static void mctp_frag_test_to_desc(const struct mctp_frag_test *t, char *desc)
+{
+ sprintf(desc, "mtu %d len %d -> %d frags",
+ t->msgsize, t->mtu, t->n_frags);
+}
+
+KUNIT_ARRAY_PARAM(mctp_frag, mctp_frag_tests, mctp_frag_test_to_desc);
+
+struct mctp_rx_input_test {
+ struct mctp_hdr hdr;
+ bool input;
+};
+
+static void mctp_test_rx_input(struct kunit *test)
+{
+ const struct mctp_rx_input_test *params;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+
+ params = test->param_value;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ skb = mctp_test_create_skb(&params->hdr, 1);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ __mctp_cb(skb);
+
+ mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL);
+
+ KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input);
+
+ mctp_test_route_destroy(test, rt);
+ mctp_test_destroy_dev(dev);
+}
+
+#define RX_HDR(_ver, _src, _dest, _fst) \
+ { .ver = _ver, .src = _src, .dest = _dest, .flags_seq_tag = _fst }
+
+/* we have a route for EID 8 only */
+static const struct mctp_rx_input_test mctp_rx_input_tests[] = {
+ { .hdr = RX_HDR(1, 10, 8, 0), .input = true },
+ { .hdr = RX_HDR(1, 10, 9, 0), .input = false }, /* no input route */
+ { .hdr = RX_HDR(2, 10, 8, 0), .input = false }, /* invalid version */
+};
+
+static void mctp_rx_input_test_to_desc(const struct mctp_rx_input_test *t,
+ char *desc)
+{
+ sprintf(desc, "{%x,%x,%x,%x}", t->hdr.ver, t->hdr.src, t->hdr.dest,
+ t->hdr.flags_seq_tag);
+}
+
+KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests,
+ mctp_rx_input_test_to_desc);
+
+/* set up a local dev, route on EID 8, and a socket listening on type 0 */
+static void __mctp_route_test_init(struct kunit *test,
+ struct mctp_test_dev **devp,
+ struct mctp_test_route **rtp,
+ struct socket **sockp)
+{
+ struct sockaddr_mctp addr;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct socket *sock;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = MCTP_NET_ANY;
+ addr.smctp_addr.s_addr = 8;
+ addr.smctp_type = 0;
+ rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ *rtp = rt;
+ *devp = dev;
+ *sockp = sock;
+}
+
+static void __mctp_route_test_fini(struct kunit *test,
+ struct mctp_test_dev *dev,
+ struct mctp_test_route *rt,
+ struct socket *sock)
+{
+ sock_release(sock);
+ mctp_test_route_destroy(test, rt);
+ mctp_test_destroy_dev(dev);
+}
+
+struct mctp_route_input_sk_test {
+ struct mctp_hdr hdr;
+ u8 type;
+ bool deliver;
+};
+
+static void mctp_test_route_input_sk(struct kunit *test)
+{
+ const struct mctp_route_input_sk_test *params;
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct socket *sock;
+ int rc;
+
+ params = test->param_value;
+
+ __mctp_route_test_init(test, &dev, &rt, &sock);
+
+ skb = mctp_test_create_skb_data(&params->hdr, &params->type);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ rc = mctp_route_input(&rt->rt, skb);
+
+ if (params->deliver) {
+ KUNIT_EXPECT_EQ(test, rc, 0);
+
+ skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+ KUNIT_EXPECT_EQ(test, skb->len, 1);
+
+ skb_free_datagram(sock->sk, skb2);
+
+ } else {
+ KUNIT_EXPECT_NE(test, rc, 0);
+ skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+ }
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+#define FL_S (MCTP_HDR_FLAG_SOM)
+#define FL_E (MCTP_HDR_FLAG_EOM)
+#define FL_T (MCTP_HDR_FLAG_TO)
+
+static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = {
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false },
+};
+
+static void mctp_route_input_sk_to_desc(const struct mctp_route_input_sk_test *t,
+ char *desc)
+{
+ sprintf(desc, "{%x,%x,%x,%x} type %d", t->hdr.ver, t->hdr.src,
+ t->hdr.dest, t->hdr.flags_seq_tag, t->type);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk, mctp_route_input_sk_tests,
+ mctp_route_input_sk_to_desc);
+
+struct mctp_route_input_sk_reasm_test {
+ const char *name;
+ struct mctp_hdr hdrs[4];
+ int n_hdrs;
+ int rx_len;
+};
+
+static void mctp_test_route_input_sk_reasm(struct kunit *test)
+{
+ const struct mctp_route_input_sk_reasm_test *params;
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct socket *sock;
+ int i, rc;
+ u8 c;
+
+ params = test->param_value;
+
+ __mctp_route_test_init(test, &dev, &rt, &sock);
+
+ for (i = 0; i < params->n_hdrs; i++) {
+ c = i;
+ skb = mctp_test_create_skb_data(&params->hdrs[i], &c);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ rc = mctp_route_input(&rt->rt, skb);
+ }
+
+ skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+
+ if (params->rx_len) {
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+ KUNIT_EXPECT_EQ(test, skb2->len, params->rx_len);
+ skb_free_datagram(sock->sk, skb2);
+
+ } else {
+ KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+ }
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
+
+static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = {
+ {
+ .name = "single packet",
+ .hdrs = {
+ RX_FRAG(FL_S | FL_E, 0),
+ },
+ .n_hdrs = 1,
+ .rx_len = 1,
+ },
+ {
+ .name = "single packet, offset seq",
+ .hdrs = {
+ RX_FRAG(FL_S | FL_E, 1),
+ },
+ .n_hdrs = 1,
+ .rx_len = 1,
+ },
+ {
+ .name = "start & end packets",
+ .hdrs = {
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(FL_E, 1),
+ },
+ .n_hdrs = 2,
+ .rx_len = 2,
+ },
+ {
+ .name = "start & end packets, offset seq",
+ .hdrs = {
+ RX_FRAG(FL_S, 1),
+ RX_FRAG(FL_E, 2),
+ },
+ .n_hdrs = 2,
+ .rx_len = 2,
+ },
+ {
+ .name = "start & end packets, out of order",
+ .hdrs = {
+ RX_FRAG(FL_E, 1),
+ RX_FRAG(FL_S, 0),
+ },
+ .n_hdrs = 2,
+ .rx_len = 0,
+ },
+ {
+ .name = "start, middle & end packets",
+ .hdrs = {
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(0, 1),
+ RX_FRAG(FL_E, 2),
+ },
+ .n_hdrs = 3,
+ .rx_len = 3,
+ },
+ {
+ .name = "missing seq",
+ .hdrs = {
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(FL_E, 2),
+ },
+ .n_hdrs = 2,
+ .rx_len = 0,
+ },
+ {
+ .name = "seq wrap",
+ .hdrs = {
+ RX_FRAG(FL_S, 3),
+ RX_FRAG(FL_E, 0),
+ },
+ .n_hdrs = 2,
+ .rx_len = 2,
+ },
+};
+
+static void mctp_route_input_sk_reasm_to_desc(
+ const struct mctp_route_input_sk_reasm_test *t,
+ char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests,
+ mctp_route_input_sk_reasm_to_desc);
+
+static struct kunit_case mctp_test_cases[] = {
+ KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm,
+ mctp_route_input_sk_reasm_gen_params),
+ {}
+};
+
+static struct kunit_suite mctp_test_suite = {
+ .name = "mctp",
+ .test_cases = mctp_test_cases,
+};
+
+kunit_test_suite(mctp_test_suite);
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
new file mode 100644
index 000000000000..cc6b8803aa9d
--- /dev/null
+++ b/net/mctp/test/utils.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/netdevice.h>
+#include <linux/mctp.h>
+#include <linux/if_arp.h>
+
+#include <net/mctpdevice.h>
+#include <net/pkt_sched.h>
+
+#include "utils.h"
+
+static netdev_tx_t mctp_test_dev_tx(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ kfree(skb);
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops mctp_test_netdev_ops = {
+ .ndo_start_xmit = mctp_test_dev_tx,
+};
+
+static void mctp_test_dev_setup(struct net_device *ndev)
+{
+ ndev->type = ARPHRD_MCTP;
+ ndev->mtu = MCTP_DEV_TEST_MTU;
+ ndev->hard_header_len = 0;
+ ndev->addr_len = 0;
+ ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ ndev->flags = IFF_NOARP;
+ ndev->netdev_ops = &mctp_test_netdev_ops;
+ ndev->needs_free_netdev = true;
+}
+
+struct mctp_test_dev *mctp_test_create_dev(void)
+{
+ struct mctp_test_dev *dev;
+ struct net_device *ndev;
+ int rc;
+
+ ndev = alloc_netdev(sizeof(*dev), "mctptest%d", NET_NAME_ENUM,
+ mctp_test_dev_setup);
+ if (!ndev)
+ return NULL;
+
+ dev = netdev_priv(ndev);
+ dev->ndev = ndev;
+
+ rc = register_netdev(ndev);
+ if (rc) {
+ free_netdev(ndev);
+ return NULL;
+ }
+
+ rcu_read_lock();
+ dev->mdev = __mctp_dev_get(ndev);
+ mctp_dev_hold(dev->mdev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
+void mctp_test_destroy_dev(struct mctp_test_dev *dev)
+{
+ mctp_dev_put(dev->mdev);
+ unregister_netdev(dev->ndev);
+}
diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h
new file mode 100644
index 000000000000..df6aa1c03440
--- /dev/null
+++ b/net/mctp/test/utils.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NET_MCTP_TEST_UTILS_H
+#define __NET_MCTP_TEST_UTILS_H
+
+#include <kunit/test.h>
+
+#define MCTP_DEV_TEST_MTU 68
+
+struct mctp_test_dev {
+ struct net_device *ndev;
+ struct mctp_dev *mdev;
+};
+
+struct mctp_test_dev;
+
+struct mctp_test_dev *mctp_test_create_dev(void);
+void mctp_test_destroy_dev(struct mctp_test_dev *dev);
+
+#endif /* __NET_MCTP_TEST_UTILS_H */
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index b21ff9be04c6..3240b72271a7 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -72,6 +72,7 @@ bool mptcp_mib_alloc(struct net *net)
void mptcp_seq_show(struct seq_file *seq)
{
+ unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1];
struct net *net = seq->private;
int i;
@@ -81,17 +82,13 @@ void mptcp_seq_show(struct seq_file *seq)
seq_puts(seq, "\nMPTcpExt:");
- if (!net->mib.mptcp_statistics) {
- for (i = 0; mptcp_snmp_list[i].name; i++)
- seq_puts(seq, " 0");
-
- seq_putc(seq, '\n');
- return;
- }
+ memset(sum, 0, sizeof(sum));
+ if (net->mib.mptcp_statistics)
+ snmp_get_cpu_field_batch(sum, mptcp_snmp_list,
+ net->mib.mptcp_statistics);
for (i = 0; mptcp_snmp_list[i].name; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.mptcp_statistics,
- mptcp_snmp_list[i].entry));
+ seq_printf(seq, " %lu", sum[i]);
+
seq_putc(seq, '\n');
}
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f48eb6315bbb..f44125dd6697 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
struct sock *sk;
net = sock_net(in_skb->sk);
- msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
+ msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
if (!msk)
goto out_nosk;
@@ -113,37 +113,13 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_info *info = _info;
- u32 flags = 0;
- bool slow;
- u8 val;
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
if (!info)
return;
- slow = lock_sock_fast(sk);
- info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
- info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
- info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
- info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
- info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
- val = mptcp_pm_get_add_addr_signal_max(msk);
- info->mptcpi_add_addr_signal_max = val;
- val = mptcp_pm_get_add_addr_accept_max(msk);
- info->mptcpi_add_addr_accepted_max = val;
- info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
- if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
- flags |= MPTCP_INFO_FLAG_FALLBACK;
- if (READ_ONCE(msk->can_ack))
- flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
- info->mptcpi_flags = flags;
- info->mptcpi_token = READ_ONCE(msk->token);
- info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
- info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
- info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
- info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
- unlock_sock_fast(sk, slow);
+ mptcp_diag_fill_info(msk, info);
}
static const struct inet_diag_handler mptcp_diag_handler = {
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c41273cefc51..7c3420afb1a0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
mpext = mptcp_get_ext(skb);
data_len = mpext ? mpext->data_len : 0;
- /* we will check ext_copy.data_len in mptcp_write_options() to
+ /* we will check ops->data_len in mptcp_write_options() to
* discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
* TCPOLEN_MPTCP_MPC_ACK
*/
- opts->ext_copy.data_len = data_len;
+ opts->data_len = data_len;
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
@@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
if (opts->csum_reqd) {
/* we need to propagate more info to csum the pseudo hdr */
- opts->ext_copy.data_seq = mpext->data_seq;
- opts->ext_copy.subflow_seq = mpext->subflow_seq;
- opts->ext_copy.csum = mpext->csum;
+ opts->data_seq = mpext->data_seq;
+ opts->subflow_seq = mpext->subflow_seq;
+ opts->csum = mpext->csum;
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*size = ALIGN(len, 4);
@@ -748,9 +748,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
/* can't send MP_PRIO with MPC, as they share the same option space:
* 'backup'. Also it makes no sense at all
*/
- if (!subflow->send_mp_prio ||
- ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
- OPTION_MPTCP_MPC_ACK) & opts->suboptions))
+ if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC))
return false;
/* account for the trailing 'nop' option */
@@ -1019,11 +1017,9 @@ static void ack_update_msk(struct mptcp_sock *msk,
old_snd_una = msk->snd_una;
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
- /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
- if (unlikely(after64(new_snd_una, snd_nxt))) {
- if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
- new_snd_una = old_snd_una;
- }
+ /* ACK for data not even sent yet? Ignore.*/
+ if (unlikely(after64(new_snd_una, snd_nxt)))
+ new_snd_una = old_snd_una;
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
@@ -1227,7 +1223,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1237,15 +1233,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
* always the 64-bit value, irrespective of what length is used in the
* DSS option itself.
*/
- header.data_seq = cpu_to_be64(mpext->data_seq);
- header.subflow_seq = htonl(mpext->subflow_seq);
- header.data_len = htons(mpext->data_len);
+ header.data_seq = cpu_to_be64(data_seq);
+ header.subflow_seq = htonl(subflow_seq);
+ header.data_len = htons(data_len);
header.csum = 0;
- csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
return (__force u16)csum_fold(csum);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+ mpext->csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
@@ -1329,15 +1331,14 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
}
- } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
- OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+ } else if (OPTIONS_MPTCP_MPC & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
} else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- } else if (opts->ext_copy.data_len) {
+ } else if (opts->data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
@@ -1366,14 +1367,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be64(opts->rcvr_key, ptr);
ptr += 2;
- if (!opts->ext_copy.data_len)
+ if (!opts->data_len)
goto mp_capable_done;
if (opts->csum_reqd) {
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- mptcp_make_csum(&opts->ext_copy), ptr);
+ put_unaligned_be32(opts->data_len << 16 |
+ __mptcp_make_csum(opts->data_seq,
+ opts->subflow_seq,
+ opts->data_len,
+ opts->csum), ptr);
} else {
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c4f9a5ce3815..7b96be1e9f14 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -654,9 +654,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
}
}
-int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
- u8 bkup)
+static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ u8 bkup)
{
struct mptcp_subflow_context *subflow;
@@ -1718,9 +1718,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (addresses_equal(&entry->addr, &addr.addr, true)) {
- ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
- if (ret)
- return ret;
+ mptcp_nl_addr_backup(net, &entry->addr, bkup);
if (bkup)
entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
@@ -2054,6 +2052,9 @@ static int __net_init pm_nl_init_net(struct net *net)
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
+
+ /* Cit. 2 subflows ought to be enough for anybody. */
+ pernet->subflows_max = 2;
pernet->next_id = 1;
pernet->stale_loss_cnt = 4;
spin_lock_init(&pernet->lock);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2602f1386160..b7e32e316738 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -126,6 +126,11 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_charge(struct sock *sk, int size)
+{
+ mptcp_sk(sk)->rmem_fwd_alloc -= size;
+}
+
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
struct sk_buff *from)
{
@@ -142,7 +147,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
kfree_skb_partial(from, fragstolen);
atomic_add(delta, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, delta);
+ mptcp_rmem_charge(sk, delta);
return true;
}
@@ -155,6 +160,44 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
return mptcp_try_coalesce((struct sock *)msk, to, from);
}
+static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
+{
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ __sk_mem_reduce_allocated(sk, amount);
+}
+
+static void mptcp_rmem_uncharge(struct sock *sk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int reclaimable;
+
+ msk->rmem_fwd_alloc += size;
+ reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+
+ /* see sk_mem_uncharge() for the rationale behind the following schema */
+ if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+ __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+}
+
+static void mptcp_rfree(struct sk_buff *skb)
+{
+ unsigned int len = skb->truesize;
+ struct sock *sk = skb->sk;
+
+ atomic_sub(len, &sk->sk_rmem_alloc);
+ mptcp_rmem_uncharge(sk, len);
+}
+
+static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = mptcp_rfree;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ mptcp_rmem_charge(sk, skb->truesize);
+}
+
/* "inspired" by tcp_data_queue_ofo(), main differences:
* - use mptcp seqs
* - don't cope with sacks
@@ -267,7 +310,29 @@ merge_right:
end:
skb_condense(skb);
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
+}
+
+static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int amt, amount;
+
+ if (size < msk->rmem_fwd_alloc)
+ return true;
+
+ amt = sk_mem_pages(size);
+ amount = amt << SK_MEM_QUANTUM_SHIFT;
+ msk->rmem_fwd_alloc += amount;
+ if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
+ if (ssk->sk_forward_alloc < amount) {
+ msk->rmem_fwd_alloc -= amount;
+ return false;
+ }
+
+ ssk->sk_forward_alloc -= amount;
+ }
+ return true;
}
static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -285,15 +350,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
-
- if (ssk->sk_forward_alloc < amount)
- goto drop;
-
- ssk->sk_forward_alloc -= amount;
- sk->sk_forward_alloc += amount;
- }
+ if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+ goto drop;
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -313,7 +371,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (tail && mptcp_try_coalesce(sk, tail, skb))
return true;
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
return true;
} else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -528,7 +586,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
@@ -742,10 +799,9 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk)) {
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ if (move_skbs_to_msk(msk, ssk))
sk->sk_data_ready(sk);
- }
+
mptcp_data_unlock(sk);
}
@@ -847,7 +903,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
}
@@ -911,124 +966,20 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(int size)
-{
- return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
-}
-
-static void __mptcp_wmem_reserve(struct sock *sk, int size)
-{
- int amount = mptcp_wmem_with_overhead(size);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- WARN_ON_ONCE(msk->wmem_reserved);
- if (WARN_ON_ONCE(amount < 0))
- amount = 0;
-
- if (amount <= sk->sk_forward_alloc)
- goto reserve;
-
- /* under memory pressure try to reserve at most a single page
- * otherwise try to reserve the full estimate and fallback
- * to a single page before entering the error path
- */
- if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) ||
- !sk_wmem_schedule(sk, amount)) {
- if (amount <= PAGE_SIZE)
- goto nomem;
-
- amount = PAGE_SIZE;
- if (!sk_wmem_schedule(sk, amount))
- goto nomem;
- }
-
-reserve:
- msk->wmem_reserved = amount;
- sk->sk_forward_alloc -= amount;
- return;
-
-nomem:
- /* we will wait for memory on next allocation */
- msk->wmem_reserved = -1;
-}
-
-static void __mptcp_update_wmem(struct sock *sk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
-#ifdef CONFIG_LOCKDEP
- WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
-#endif
-
- if (!msk->wmem_reserved)
- return;
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- if (msk->wmem_reserved > 0) {
- sk->sk_forward_alloc += msk->wmem_reserved;
- msk->wmem_reserved = 0;
- }
-}
-
-static bool mptcp_wmem_alloc(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* check for pre-existing error condition */
- if (msk->wmem_reserved < 0)
- return false;
-
- if (msk->wmem_reserved >= size)
- goto account;
-
- mptcp_data_lock(sk);
- if (!sk_wmem_schedule(sk, size)) {
- mptcp_data_unlock(sk);
- return false;
- }
-
- sk->sk_forward_alloc -= size;
- msk->wmem_reserved += size;
- mptcp_data_unlock(sk);
-
-account:
- msk->wmem_reserved -= size;
- return true;
-}
-
-static void mptcp_wmem_uncharge(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- msk->wmem_reserved += size;
-}
-
static void __mptcp_mem_reclaim_partial(struct sock *sk)
{
+ int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+
lockdep_assert_held_once(&sk->sk_lock.slock);
- __mptcp_update_wmem(sk);
+
+ __mptcp_rmem_reclaim(sk, reclaimable - 1);
sk_mem_reclaim_partial(sk);
}
static void mptcp_mem_reclaim_partial(struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* if we are experiencing a transint allocation error,
- * the forward allocation memory has been already
- * released
- */
- if (msk->wmem_reserved < 0)
- return;
-
mptcp_data_lock(sk);
- sk->sk_forward_alloc += msk->wmem_reserved;
- sk_mem_reclaim_partial(sk);
- msk->wmem_reserved = sk->sk_forward_alloc;
- sk->sk_forward_alloc = 0;
+ __mptcp_mem_reclaim_partial(sk);
mptcp_data_unlock(sk);
}
@@ -1107,7 +1058,8 @@ out:
if (cleaned && tcp_under_memory_pressure(sk))
__mptcp_mem_reclaim_partial(sk);
- if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
+ if (snd_una == READ_ONCE(msk->snd_nxt) &&
+ snd_una == READ_ONCE(msk->write_seq)) {
if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_timer(sk);
} else {
@@ -1117,9 +1069,8 @@ out:
static void __mptcp_clean_una_wakeup(struct sock *sk)
{
-#ifdef CONFIG_LOCKDEP
- WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
-#endif
+ lockdep_assert_held_once(&sk->sk_lock.slock);
+
__mptcp_clean_una(sk);
mptcp_write_space(sk);
}
@@ -1223,7 +1174,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
if (likely(skb)) {
if (likely(__mptcp_add_ext(skb, gfp))) {
skb_reserve(skb, MAX_TCP_HEADER);
- skb->reserved_tailroom = skb->end - skb->tail;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
__kfree_skb(skb);
@@ -1233,31 +1185,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
return NULL;
}
-static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
+static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
struct sk_buff *skb;
- if (ssk->sk_tx_skb_cache) {
- skb = ssk->sk_tx_skb_cache;
- if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) &&
- !__mptcp_add_ext(skb, gfp)))
- return false;
- return true;
- }
-
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
if (!skb)
- return false;
+ return NULL;
if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
- ssk->sk_tx_skb_cache = skb;
- return true;
+ tcp_skb_entail(ssk, skb);
+ return skb;
}
kfree_skb(skb);
- return false;
+ return NULL;
}
-static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
+static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
{
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
@@ -1287,23 +1231,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info)
{
u64 data_seq = dfrag->data_seq + info->sent;
+ int offset = dfrag->offset + info->sent;
struct mptcp_sock *msk = mptcp_sk(sk);
bool zero_window_probe = false;
struct mptcp_ext *mpext = NULL;
- struct sk_buff *skb, *tail;
- bool must_collapse = false;
- int size_bias = 0;
- int avail_size;
- size_t ret = 0;
+ bool can_coalesce = false;
+ bool reuse_skb = true;
+ struct sk_buff *skb;
+ size_t copy;
+ int i;
pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
+ if (WARN_ON_ONCE(info->sent > info->limit ||
+ info->limit > dfrag->data_len))
+ return 0;
+
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
- avail_size = info->size_goal;
+ copy = info->size_goal;
+
skb = tcp_write_queue_tail(ssk);
- if (skb) {
+ if (skb && copy > skb->len) {
/* Limit the write to the size available in the
* current skb, if any, so that we create at most a new skb.
* Explicitly tells TCP internals to avoid collapsing on later
@@ -1316,62 +1266,79 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
goto alloc_skb;
}
- must_collapse = (info->size_goal - skb->len > 0) &&
- (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
- if (must_collapse) {
- size_bias = skb->len;
- avail_size = info->size_goal - skb->len;
+ i = skb_shinfo(skb)->nr_frags;
+ can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ tcp_mark_push(tcp_sk(ssk), skb);
+ goto alloc_skb;
}
- }
+ copy -= skb->len;
+ } else {
alloc_skb:
- if (!must_collapse && !ssk->sk_tx_skb_cache &&
- !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
- return 0;
+ skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held);
+ if (!skb)
+ return -ENOMEM;
+
+ i = skb_shinfo(skb)->nr_frags;
+ reuse_skb = false;
+ mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ }
/* Zero window and all data acked? Probe. */
- avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
- if (avail_size == 0) {
+ copy = mptcp_check_allowed_size(msk, data_seq, copy);
+ if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
- if (skb || snd_una != msk->snd_nxt)
+ if (snd_una != msk->snd_nxt) {
+ tcp_remove_empty_skb(ssk);
return 0;
+ }
+
zero_window_probe = true;
data_seq = snd_una - 1;
- avail_size = 1;
- }
+ copy = 1;
- if (WARN_ON_ONCE(info->sent > info->limit ||
- info->limit > dfrag->data_len))
- return 0;
+ /* all mptcp-level data is acked, no skbs should be present into the
+ * ssk write queue
+ */
+ WARN_ON_ONCE(reuse_skb);
+ }
- ret = info->limit - info->sent;
- tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
- dfrag->page, dfrag->offset + info->sent, &ret);
- if (!tail) {
- tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
+ copy = min_t(size_t, copy, info->limit - info->sent);
+ if (!sk_wmem_schedule(ssk, copy)) {
+ tcp_remove_empty_skb(ssk);
return -ENOMEM;
}
- /* if the tail skb is still the cached one, collapsing really happened.
- */
- if (skb == tail) {
- TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
- mpext->data_len += ret;
+ if (can_coalesce) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ } else {
+ get_page(dfrag->page);
+ skb_fill_page_desc(skb, i, dfrag->page, offset, copy);
+ }
+
+ skb->len += copy;
+ skb->data_len += copy;
+ skb->truesize += copy;
+ sk_wmem_queued_add(ssk, copy);
+ sk_mem_charge(ssk, copy);
+ WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
+ TCP_SKB_CB(skb)->end_seq += copy;
+ tcp_skb_pcount_set(skb, 0);
+
+ /* on skb reuse we just need to update the DSS len */
+ if (reuse_skb) {
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+ mpext->data_len += copy;
WARN_ON_ONCE(zero_window_probe);
goto out;
}
- mpext = skb_ext_find(tail, SKB_EXT_MPTCP);
- if (WARN_ON_ONCE(!mpext)) {
- /* should never reach here, stream corrupted */
- return -EINVAL;
- }
-
memset(mpext, 0, sizeof(*mpext));
mpext->data_seq = data_seq;
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
- mpext->data_len = ret;
+ mpext->data_len = copy;
mpext->use_map = 1;
mpext->dsn64 = 1;
@@ -1380,18 +1347,18 @@ alloc_skb:
mpext->dsn64);
if (zero_window_probe) {
- mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
+ mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
mpext->frozen = 1;
if (READ_ONCE(msk->csum_enabled))
- mptcp_update_data_checksum(tail, ret);
+ mptcp_update_data_checksum(skb, copy);
tcp_push_pending_frames(ssk);
return 0;
}
out:
if (READ_ONCE(msk->csum_enabled))
- mptcp_update_data_checksum(tail, ret);
- mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
- return ret;
+ mptcp_update_data_checksum(skb, copy);
+ mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
+ return copy;
}
#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
@@ -1501,13 +1468,44 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return NULL;
}
-static void mptcp_push_release(struct sock *sk, struct sock *ssk,
- struct mptcp_sendmsg_info *info)
+static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
{
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
}
+static void mptcp_update_post_push(struct mptcp_sock *msk,
+ struct mptcp_data_frag *dfrag,
+ u32 sent)
+{
+ u64 snd_nxt_new = dfrag->data_seq;
+
+ dfrag->already_sent += sent;
+
+ msk->snd_burst -= sent;
+
+ snd_nxt_new += dfrag->already_sent;
+
+ /* snd_nxt_new can be smaller than snd_nxt in case mptcp
+ * is recovering after a failover. In that event, this re-sends
+ * old segments.
+ *
+ * Thus compute snd_nxt_new candidate based on
+ * the dfrag->data_seq that was sent and the data
+ * that has been handed to the subflow for transmission
+ * and skip update in case it was old dfrag.
+ */
+ if (likely(after64(snd_nxt_new, msk->snd_nxt)))
+ msk->snd_nxt = snd_nxt_new;
+}
+
+static void mptcp_check_and_set_pending(struct sock *sk)
+{
+ if (mptcp_send_head(sk) &&
+ !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
+ set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+}
+
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
@@ -1533,7 +1531,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
* the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(sk, prev_ssk, &info);
+ mptcp_push_release(prev_ssk, &info);
if (!ssk)
goto out;
@@ -1546,24 +1544,22 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
goto out;
}
info.sent += ret;
- dfrag->already_sent += ret;
- msk->snd_nxt += ret;
- msk->snd_burst -= ret;
- msk->tx_pending_data -= ret;
copied += ret;
len -= ret;
+
+ mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
out:
/* ensure the rtx timer is running */
@@ -1609,13 +1605,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
goto out;
info.sent += ret;
- dfrag->already_sent += ret;
- msk->snd_nxt += ret;
- msk->snd_burst -= ret;
- msk->tx_pending_data -= ret;
copied += ret;
len -= ret;
first = false;
+
+ mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
@@ -1624,7 +1618,6 @@ out:
/* __mptcp_alloc_tx_skb could have released some wmem and we are
* not going to flush it via release_sock()
*/
- __mptcp_update_wmem(sk);
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
@@ -1661,7 +1654,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
- mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
+ lock_sock(sk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1709,23 +1702,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- if (!mptcp_wmem_alloc(sk, total_ts))
+ if (!sk_wmem_schedule(sk, total_ts))
goto wait_for_memory;
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
- mptcp_wmem_uncharge(sk, psize + frag_truesize);
ret = -EFAULT;
goto out;
}
/* data successfully copied into the write queue */
+ sk->sk_forward_alloc -= total_ts;
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
pfrag->offset += frag_truesize;
WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
- msk->tx_pending_data += psize;
/* charge data on mptcp pending queue to the msk socket
* Note: we charge such data both to sk and ssk
@@ -1759,21 +1751,6 @@ out:
return copied ? : ret;
}
-static void mptcp_wait_data(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
- sk_wait_event(sk, timeo,
- test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
-
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
-}
-
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
size_t len, int flags,
@@ -1932,7 +1909,7 @@ static void __mptcp_update_rmem(struct sock *sk)
return;
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
- sk_mem_uncharge(sk, msk->rmem_released);
+ mptcp_rmem_uncharge(sk, msk->rmem_released);
WRITE_ONCE(msk->rmem_released, 0);
}
@@ -2000,7 +1977,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
- mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk));
+ lock_sock(sk);
if (unlikely(sk->sk_state == TCP_LISTEN)) {
copied = -ENOTCONN;
goto out_err;
@@ -2077,19 +2054,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld", timeo);
- mptcp_wait_data(sk, &timeo);
- }
-
- if (skb_queue_empty_lockless(&sk->sk_receive_queue) &&
- skb_queue_empty(&msk->receive_queue)) {
- /* entire backlog drained, clear DATA_READY. */
- clear_bit(MPTCP_DATA_READY, &msk->flags);
-
- /* .. race-breaker: ssk might have gotten new data
- * after last __mptcp_move_skbs() returned false.
- */
- if (unlikely(__mptcp_move_skbs(msk)))
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk_wait_data(sk, &timeo, NULL);
}
out_err:
@@ -2098,9 +2063,9 @@ out_err:
tcp_recv_timestamp(msg, sk, &tss);
}
- pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
- msk, test_bit(MPTCP_DATA_READY, &msk->flags),
- skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
+ pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
+ msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
+ skb_queue_empty(&msk->receive_queue), copied);
if (!(flags & MSG_PEEK))
mptcp_rcv_space_adjust(msk, copied);
@@ -2213,15 +2178,11 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
return false;
}
- /* will accept ack for reijected data before re-sending them */
- if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
- msk->recovery_snd_nxt = msk->snd_nxt;
+ msk->recovery_snd_nxt = msk->snd_nxt;
msk->recovery = true;
mptcp_data_unlock(sk);
msk->first_pending = rtx_head;
- msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
- msk->snd_nxt = rtx_head->data_seq;
msk->snd_burst = 0;
/* be sure to clear the "sent status" on all re-injected fragments */
@@ -2368,7 +2329,6 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
mptcp_close_wake_up(sk);
@@ -2384,6 +2344,9 @@ static void __mptcp_retrans(struct sock *sk)
int ret;
mptcp_clean_una_wakeup(sk);
+
+ /* first check ssk: need to kick "stale" logic */
+ ssk = mptcp_subflow_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -2396,10 +2359,12 @@ static void __mptcp_retrans(struct sock *sk)
goto reset_timer;
}
- return;
+ if (!mptcp_send_head(sk))
+ return;
+
+ goto reset_timer;
}
- ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
goto reset_timer;
@@ -2426,6 +2391,8 @@ static void __mptcp_retrans(struct sock *sk)
release_sock(ssk);
reset_timer:
+ mptcp_check_and_set_pending(sk);
+
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
}
@@ -2490,9 +2457,8 @@ static int __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->wmem_reserved = 0;
+ msk->rmem_fwd_alloc = 0;
WRITE_ONCE(msk->rmem_released, 0);
- msk->tx_pending_data = 0;
msk->timer_ival = TCP_RTO_MIN;
msk->first = NULL;
@@ -2702,7 +2668,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->wmem_reserved);
+ WARN_ON_ONCE(msk->rmem_fwd_alloc);
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -2735,7 +2701,7 @@ cleanup:
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow = lock_sock_fast(ssk);
+ bool slow = lock_sock_fast_nested(ssk);
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
@@ -2935,8 +2901,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
-
+ __skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
+
+ /* move all the rx fwd alloc into the sk_mem_reclaim_final in
+ * inet_sock_destruct() will dispose it
+ */
+ sk->sk_forward_alloc += msk->rmem_fwd_alloc;
+ msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
}
@@ -3018,10 +2990,6 @@ static void mptcp_release_cb(struct sock *sk)
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
- /* push_pending may touch wmem_reserved, ensure we do the cleanup
- * later
- */
- __mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
}
@@ -3171,6 +3139,11 @@ static void mptcp_shutdown(struct sock *sk, int how)
__mptcp_wr_shutdown(sk);
}
+static int mptcp_forward_alloc_get(const struct sock *sk)
+{
+ return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3188,6 +3161,7 @@ static struct proto mptcp_prot = {
.hash = mptcp_hash,
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
+ .forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
@@ -3385,8 +3359,14 @@ unlock_fail:
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM :
- 0;
+ /* Concurrent splices from sk_receive_queue into receive_queue will
+ * always show at least one non-empty queue when checked in this order.
+ */
+ if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
+ skb_queue_empty_lockless(&msk->receive_queue))
+ return 0;
+
+ return EPOLLIN | EPOLLRDNORM;
}
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
@@ -3421,7 +3401,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN)
- return mptcp_check_readable(msk);
+ return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3e6fd1615f1..67a61ac48b20 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -227,7 +227,7 @@ struct mptcp_sock {
u64 ack_seq;
u64 rcv_wnd_sent;
u64 rcv_data_fin_seq;
- int wmem_reserved;
+ int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
int old_wspace;
@@ -254,7 +254,6 @@ struct mptcp_sock {
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
struct sk_buff_head receive_queue;
- int tx_pending_data;
struct list_head conn_list;
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
@@ -273,19 +272,6 @@ struct mptcp_sock {
char ca_name[TCP_CA_NAME_MAX];
};
-#define mptcp_lock_sock(___sk, cb) do { \
- struct sock *__sk = (___sk); /* silence macro reuse warning */ \
- might_sleep(); \
- spin_lock_bh(&__sk->sk_lock.slock); \
- if (__sk->sk_lock.owned) \
- __lock_sock(__sk); \
- cb; \
- __sk->sk_lock.owned = 1; \
- spin_unlock(&__sk->sk_lock.slock); \
- mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \
- local_bh_enable(); \
-} while (0)
-
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
@@ -709,7 +695,7 @@ int mptcp_token_new_connect(struct sock *sk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
-struct mptcp_sock *mptcp_token_get_sock(u32 token);
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
long *s_num);
void mptcp_token_destroy(struct mptcp_sock *msk);
@@ -737,9 +723,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
-int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
- u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8c03afac5ca0..0f1e661c2032 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -14,6 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
+#define MIN_INFO_OPTLEN_SIZE 16
+
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);
@@ -670,6 +672,266 @@ out:
return ret;
}
+void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
+{
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ u32 flags = 0;
+ bool slow;
+ u8 val;
+
+ memset(info, 0, sizeof(*info));
+
+ slow = lock_sock_fast(sk);
+
+ info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
+ info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
+ info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
+ info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
+ info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
+ val = mptcp_pm_get_add_addr_signal_max(msk);
+ info->mptcpi_add_addr_signal_max = val;
+ val = mptcp_pm_get_add_addr_accept_max(msk);
+ info->mptcpi_add_addr_accepted_max = val;
+ info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
+ flags |= MPTCP_INFO_FLAG_FALLBACK;
+ if (READ_ONCE(msk->can_ack))
+ flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
+ info->mptcpi_flags = flags;
+ info->mptcpi_token = READ_ONCE(msk->token);
+ info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
+ info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
+ info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
+
+ unlock_sock_fast(sk, slow);
+}
+EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
+
+static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
+{
+ struct mptcp_info m_info;
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ len = min_t(unsigned int, len, sizeof(struct mptcp_info));
+
+ mptcp_diag_fill_info(msk, &m_info);
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, &m_info, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
+ char __user *optval,
+ u32 copied,
+ int __user *optlen)
+{
+ u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
+
+ if (copied)
+ copied += sfd->size_subflow_data;
+ else
+ copied = copylen;
+
+ if (put_user(copied, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, sfd, copylen))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
+ char __user *optval, int __user *optlen)
+{
+ int len, copylen;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ /* if mptcp_subflow_data size is changed, need to adjust
+ * this function to deal with programs using old version.
+ */
+ BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
+
+ if (len < MIN_INFO_OPTLEN_SIZE)
+ return -EINVAL;
+
+ memset(sfd, 0, sizeof(*sfd));
+
+ copylen = min_t(unsigned int, len, sizeof(*sfd));
+ if (copy_from_user(sfd, optval, copylen))
+ return -EFAULT;
+
+ /* size_subflow_data is u32, but len is signed */
+ if (sfd->size_subflow_data > INT_MAX ||
+ sfd->size_user > INT_MAX)
+ return -EINVAL;
+
+ if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
+ sfd->size_subflow_data > len)
+ return -EINVAL;
+
+ if (sfd->num_subflows || sfd->size_kernel)
+ return -EINVAL;
+
+ return len - sfd->size_subflow_data;
+}
+
+static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ unsigned int sfcount = 0, copied = 0;
+ struct mptcp_subflow_data sfd;
+ char __user *infoptr;
+ int len;
+
+ len = mptcp_get_subflow_data(&sfd, optval, optlen);
+ if (len < 0)
+ return len;
+
+ sfd.size_kernel = sizeof(struct tcp_info);
+ sfd.size_user = min_t(unsigned int, sfd.size_user,
+ sizeof(struct tcp_info));
+
+ infoptr = optval + sfd.size_subflow_data;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ++sfcount;
+
+ if (len && len >= sfd.size_user) {
+ struct tcp_info info;
+
+ tcp_get_info(ssk, &info);
+
+ if (copy_to_user(infoptr, &info, sfd.size_user)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+
+ infoptr += sfd.size_user;
+ copied += sfd.size_user;
+ len -= sfd.size_user;
+ }
+ }
+
+ release_sock(sk);
+
+ sfd.num_subflows = sfcount;
+
+ if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ memset(a, 0, sizeof(*a));
+
+ if (sk->sk_family == AF_INET) {
+ a->sin_local.sin_family = AF_INET;
+ a->sin_local.sin_port = inet->inet_sport;
+ a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
+
+ if (!a->sin_local.sin_addr.s_addr)
+ a->sin_local.sin_addr.s_addr = inet->inet_saddr;
+
+ a->sin_remote.sin_family = AF_INET;
+ a->sin_remote.sin_port = inet->inet_dport;
+ a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (WARN_ON_ONCE(!np))
+ return;
+
+ a->sin6_local.sin6_family = AF_INET6;
+ a->sin6_local.sin6_port = inet->inet_sport;
+
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ a->sin6_local.sin6_addr = np->saddr;
+ else
+ a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
+
+ a->sin6_remote.sin6_family = AF_INET6;
+ a->sin6_remote.sin6_port = inet->inet_dport;
+ a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
+#endif
+ }
+}
+
+static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct mptcp_subflow_context *subflow;
+ unsigned int sfcount = 0, copied = 0;
+ struct mptcp_subflow_data sfd;
+ char __user *addrptr;
+ int len;
+
+ len = mptcp_get_subflow_data(&sfd, optval, optlen);
+ if (len < 0)
+ return len;
+
+ sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
+ sfd.size_user = min_t(unsigned int, sfd.size_user,
+ sizeof(struct mptcp_subflow_addrs));
+
+ addrptr = optval + sfd.size_subflow_data;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ++sfcount;
+
+ if (len && len >= sfd.size_user) {
+ struct mptcp_subflow_addrs a;
+
+ mptcp_get_sub_addrs(ssk, &a);
+
+ if (copy_to_user(addrptr, &a, sfd.size_user)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+
+ addrptr += sfd.size_user;
+ copied += sfd.size_user;
+ len -= sfd.size_user;
+ }
+ }
+
+ release_sock(sk);
+
+ sfd.num_subflows = sfcount;
+
+ if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
@@ -684,6 +946,21 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
+ char __user *optval, int __user *optlen)
+{
+ switch (optname) {
+ case MPTCP_INFO:
+ return mptcp_getsockopt_info(msk, optval, optlen);
+ case MPTCP_TCPINFO:
+ return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
+ case MPTCP_SUBFLOW_ADDRS:
+ return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
+ }
+
+ return -EOPNOTSUPP;
+}
+
int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
@@ -706,6 +983,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
if (level == SOL_TCP)
return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
+ if (level == SOL_MPTCP)
+ return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
return -EOPNOTSUPP;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1de7ce883c37..6172f380dfb7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
struct mptcp_sock *msk;
int local_id;
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
return NULL;
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index 37127781aee9..7f22526346a7 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
e->valid = 0;
- msk = mptcp_token_get_sock(e->token);
+ msk = mptcp_token_get_sock(net, e->token);
if (!msk) {
spin_unlock_bh(&join_entry_locks[i]);
return false;
}
- /* If this fails, the token got re-used in the mean time by another
- * mptcp socket in a different netns, i.e. entry is outdated.
- */
- if (!net_eq(sock_net((struct sock *)msk), net))
- goto err_put;
-
subflow_req->remote_nonce = e->remote_nonce;
subflow_req->local_nonce = e->local_nonce;
subflow_req->backup = e->backup;
@@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
subflow_req->msk = msk;
spin_unlock_bh(&join_entry_locks[i]);
return true;
-
-err_put:
- spin_unlock_bh(&join_entry_locks[i]);
- sock_put((struct sock *)msk);
- return false;
}
void __init mptcp_join_cookie_init(void)
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index a98e554b034f..e581b341c5be 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -231,6 +231,7 @@ found:
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
+ * @net: restrict to this namespace
* @token: token of the mptcp connection to retrieve
*
* This function returns the mptcp connection structure with the given token.
@@ -238,7 +239,7 @@ found:
*
* returns NULL if no connection with the given token value exists.
*/
-struct mptcp_sock *mptcp_token_get_sock(u32 token)
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
{
struct hlist_nulls_node *pos;
struct token_bucket *bucket;
@@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token)
again:
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
msk = mptcp_sk(sk);
- if (READ_ONCE(msk->token) != token)
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net))
continue;
+
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto not_found;
- if (READ_ONCE(msk->token) != token) {
+
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net)) {
sock_put(sk);
goto again;
}
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index e1bd6f0a0676..5d984bec1cd8 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
mptcp_token_init_request((struct request_sock *)req);
+ sock_net_set((struct sock *)req, &init_net);
return req;
}
@@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test)
KUNIT_ASSERT_EQ(test, 0,
mptcp_token_new_request((struct request_sock *)req));
KUNIT_EXPECT_NE(test, 0, (int)req->token);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token));
/* cleanup */
mptcp_token_destroy_request((struct request_sock *)req);
@@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test)
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+ sock_net_set((struct sock *)msk, &init_net);
return msk;
}
@@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test)
mptcp_token_new_connect((struct sock *)icsk));
KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token));
KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
mptcp_token_destroy(msk);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token));
}
static void mptcp_token_test_accept(struct kunit *test)
@@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test)
mptcp_token_new_request((struct request_sock *)req));
msk->token = req->token;
mptcp_token_accept(req, msk);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* this is now a no-op */
mptcp_token_destroy_request((struct request_sock *)req);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
@@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test)
/* simulate race on removal */
refcount_set(&sk->sk_refcnt, 0);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54395266339d..3646fc195e7d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -10,6 +10,17 @@ config NETFILTER_INGRESS
This allows you to classify packets from ingress using the Netfilter
infrastructure.
+config NETFILTER_EGRESS
+ bool "Netfilter egress support"
+ default y
+ select NET_EGRESS
+ help
+ This allows you to classify packets before transmission using the
+ Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+ def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
config NETFILTER_NETLINK
tristate
@@ -109,7 +120,7 @@ config NF_CONNTRACK_MARK
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
- default m if NETFILTER_ADVANCED=n
+ default y if NETFILTER_ADVANCED=n
help
This option enables security markings to be applied to
connections. Typically they are copied to connections from
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 63d032191e62..6dec9cd395f1 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -317,6 +317,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
return &dev->nf_hooks_ingress;
}
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hooknum == NF_NETDEV_EGRESS) {
+ if (dev && dev_net(dev) == net)
+ return &dev->nf_hooks_egress;
+ }
+#endif
WARN_ON_ONCE(1);
return NULL;
}
@@ -335,7 +341,8 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg,
return 0;
}
-static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
+static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg,
+ int pf)
{
if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
(pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
@@ -344,6 +351,12 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
return false;
}
+static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg,
+ int pf)
+{
+ return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
{
#ifdef CONFIG_JUMP_LABEL
@@ -383,9 +396,18 @@ static int __nf_register_net_hook(struct net *net, int pf,
switch (pf) {
case NFPROTO_NETDEV:
- err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
- if (err < 0)
- return err;
+#ifndef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS)
+ return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+ if (reg->hooknum == NF_NETDEV_EGRESS)
+ return -EOPNOTSUPP;
+#endif
+ if ((reg->hooknum != NF_NETDEV_INGRESS &&
+ reg->hooknum != NF_NETDEV_EGRESS) ||
+ !reg->dev || dev_net(reg->dev) != net)
+ return -EINVAL;
break;
case NFPROTO_INET:
if (reg->hooknum != NF_INET_INGRESS)
@@ -418,6 +440,10 @@ static int __nf_register_net_hook(struct net *net, int pf,
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_inc_egress_queue();
+#endif
nf_static_key_inc(reg, pf);
BUG_ON(p == new_hooks);
@@ -475,6 +501,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
if (nf_ingress_hook(reg, pf))
net_dec_ingress_queue();
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_dec_egress_queue();
+#endif
nf_static_key_dec(reg, pf);
} else {
WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6186358eac7c..6e391308431d 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -130,11 +130,11 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c100c6b112c8..2c467c422dc6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
+ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+ pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 128690c512df..e93c937a8bf0 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1330,12 +1330,15 @@ drop:
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
+ unsigned int hooknum = state->hook;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ int af = state->pf;
struct sock *sk;
EnterFunction(11);
@@ -1468,56 +1471,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
return NF_ACCEPT;
}
-/*
- * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- * used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- * used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
static unsigned int
ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
@@ -1957,8 +1910,10 @@ out:
* and send it on its way...
*/
static unsigned int
-ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
+ unsigned int hooknum = state->hook;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
@@ -1966,6 +1921,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
int ret, pkts;
int conn_reuse_mode;
struct sock *sk;
+ int af = state->pf;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -2138,55 +2094,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
}
/*
- * AF_INET handler in NF_INET_LOCAL_IN chain
- * Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- * AF_INET handler in NF_INET_LOCAL_OUT chain
- * Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- * AF_INET6 handler in NF_INET_LOCAL_IN chain
- * Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- * AF_INET6 handler in NF_INET_LOCAL_OUT chain
- * Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
-
-/*
* It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
* related packets destined for 0.0.0.0/0.
* When fwmark-based virtual service is used, such as transparent
@@ -2199,45 +2106,36 @@ static unsigned int
ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- int r;
struct netns_ipvs *ipvs = net_ipvs(state->net);
-
- if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
- return NF_ACCEPT;
+ int r;
/* ipvs enabled in this netns ? */
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
-}
-
+ if (state->pf == NFPROTO_IPV4) {
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+ return NF_ACCEPT;
#ifdef CONFIG_IP_VS_IPV6
-static unsigned int
-ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- int r;
- struct netns_ipvs *ipvs = net_ipvs(state->net);
- struct ip_vs_iphdr iphdr;
+ } else {
+ struct ip_vs_iphdr iphdr;
- ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
- if (iphdr.protocol != IPPROTO_ICMPV6)
- return NF_ACCEPT;
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
- /* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
- return NF_ACCEPT;
+ if (iphdr.protocol != IPPROTO_ICMPV6)
+ return NF_ACCEPT;
- return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
-}
+ return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
#endif
+ }
+ return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
+}
static const struct nf_hook_ops ip_vs_ops4[] = {
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
@@ -2246,21 +2144,21 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_remote_request4,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
},
/* Before ip_vs_in, change source only for VS/NAT */
{
- .hook = ip_vs_local_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
},
/* After mangle, schedule and forward local requests */
{
- .hook = ip_vs_local_request4,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
@@ -2275,7 +2173,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
},
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -2286,7 +2184,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
static const struct nf_hook_ops ip_vs_ops6[] = {
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 2,
@@ -2295,21 +2193,21 @@ static const struct nf_hook_ops ip_vs_ops6[] = {
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_remote_request6,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 1,
},
/* Before ip_vs_in, change source only for VS/NAT */
{
- .hook = ip_vs_local_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
/* After mangle, schedule and forward local requests */
{
- .hook = ip_vs_local_request6,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 2,
@@ -2317,14 +2215,14 @@ static const struct nf_hook_ops ip_vs_ops6[] = {
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
- .hook = ip_vs_forward_icmp_v6,
+ .hook = ip_vs_forward_icmp,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c25097092a06..39c523bd775c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -24,7 +24,6 @@
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/workqueue.h>
-#include <linux/swap.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -48,6 +47,8 @@
#include <net/ip_vs.h>
+MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME);
+
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex);
@@ -2017,6 +2018,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "run_estimation",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -4090,6 +4097,13 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+ ipvs->sysctl_run_estimation = 1;
+ tbl[idx++].data = &ipvs->sysctl_run_estimation;
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Global sysctls must be ro in non-init netns */
+ if (!net_eq(net, &init_net))
+ tbl[idx++].mode = 0444;
+#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 05b8112ffb37..9a1a7af6a186 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t)
u64 rate;
struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
+ if (!sysctl_run_estimation(ipvs))
+ goto skip;
+
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
@@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t)
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
+
+skip:
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 94e18fb9690d..770a63103c7a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
-#define MAX_CHAINLEN 64u
+#define MIN_CHAINLEN 8u
+#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
static struct conntrack_gc_work conntrack_gc_work;
@@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
const struct net *net)
{
struct {
struct nf_conntrack_man src;
union nf_inet_addr dst_addr;
+ unsigned int zone;
u32 net_mix;
u16 dport;
u16 proto;
@@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
/* The direction must be ignored, so handle usable members manually. */
combined.src = tuple->src;
combined.dst_addr = tuple->dst.u3;
+ combined.zone = zoneid;
combined.net_mix = net_hash_mix(net);
combined.dport = (__force __u16)tuple->dst.u.all;
combined.proto = tuple->dst.protonum;
@@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
static u32 __hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
unsigned int size)
{
- return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+ return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
}
static u32 hash_conntrack(const struct net *net,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid)
{
- return scale_hash(hash_conntrack_raw(tuple, net));
+ return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
}
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
clean_from_lists(ct);
@@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, net));
+ unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ struct nf_conntrack_tuple_hash *thash;
+
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone_id, net));
+
+ if (thash)
+ return thash;
+
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (rid != zone_id)
+ return __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+ return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int max_chainlen;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
@@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1103,8 +1131,8 @@ drop:
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ unsigned int chainlen = 0, sequence, max_chainlen;
const struct nf_conntrack_zone *zone;
- unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
@@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN) {
+ if (chainlen++ > max_chainlen) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
begin:
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = __hash_conntrack(net, tuple, hsize);
+ hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ u32 hash, zone_id, rid;
struct nf_conn *ct;
- u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple, state->net);
+
+ zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ hash = hash_conntrack_raw(&tuple, zone_id, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+ if (!h) {
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (zone_id != rid) {
+ u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+ h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+ }
+ }
+
if (!h) {
h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
@@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+ if (hlist_nulls_empty(hslot))
+ continue;
+
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
- continue;
- /* All nf_conn objects are added to hash table twice, one
- * for original direction tuple, once for the reply tuple.
- *
- * Exception: In the IPS_NAT_CLASH case, only the reply
- * tuple is added (the original tuple already existed for
- * a different object).
- *
- * We only need to call the iterator once for each
- * conntrack, so we just use the 'reply' direction
- * tuple while iterating.
- */
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
+ hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+ continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
spin_unlock(lockp);
local_bh_enable();
@@ -2264,26 +2308,20 @@ found:
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
- unsigned int bucket = 0, sequence;
+ unsigned int bucket = 0;
struct nf_conn *ct;
might_sleep();
- for (;;) {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
+ mutex_lock(&nf_conntrack_mutex);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
- nf_ct_put(ct);
- cond_resched();
- }
-
- if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
- break;
- bucket = 0;
+ nf_ct_delete(ct, portid, report);
+ nf_ct_put(ct);
+ cond_resched();
}
+ mutex_unlock(&nf_conntrack_mutex);
}
struct iter_data {
@@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
if (!hash)
return -ENOMEM;
+ mutex_lock(&nf_conntrack_mutex);
old_size = nf_conntrack_htable_size;
if (old_size == hashsize) {
+ mutex_unlock(&nf_conntrack_mutex);
kvfree(hash);
return 0;
}
@@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
for (i = 0; i < nf_conntrack_htable_size; i++) {
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+ unsigned int zone_id;
+
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
bucket = __hash_conntrack(nf_ct_net(ct),
- &h->tuple, hashsize);
+ &h->tuple, zone_id, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
@@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
nf_conntrack_all_unlock();
local_bh_enable();
+ mutex_unlock(&nf_conntrack_mutex);
+
synchronize_net();
kvfree(old_hash);
return 0;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 8f7a9837349c..d1f2d3c8d2b1 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -155,6 +155,16 @@ unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
}
EXPORT_SYMBOL_GPL(nf_confirm);
+static bool in_vrf_postrouting(const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (state->hook == NF_INET_POST_ROUTING &&
+ netif_is_l3_master(state->out))
+ return true;
+#endif
+ return false;
+}
+
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -166,6 +176,9 @@ static unsigned int ipv4_confirm(void *priv,
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return nf_conntrack_confirm(skb);
+ if (in_vrf_postrouting(state))
+ return NF_ACCEPT;
+
return nf_confirm(skb,
skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
@@ -374,6 +387,9 @@ static unsigned int ipv6_confirm(void *priv,
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return nf_conntrack_confirm(skb);
+ if (in_vrf_postrouting(state))
+ return NF_ACCEPT;
+
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f8e3c0d2602f..3b516cffc779 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -104,10 +104,13 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
*/
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
unsigned long extra = timeouts[UDP_CT_UNREPLIED];
+ bool stream = false;
/* Still active after two seconds? Extend timeout. */
- if (time_after(jiffies, ct->proto.udp.stream_ts))
+ if (time_after(jiffies, ct->proto.udp.stream_ts)) {
extra = timeouts[UDP_CT_REPLIED];
+ stream = true;
+ }
nf_ct_refresh_acct(ct, ctinfo, skb, extra);
@@ -116,7 +119,7 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
return NF_ACCEPT;
/* Also, more likely to be important, and not a probe */
- if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7008961f5cb0..4d50d51db796 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
/* We keep an extra hash for each conntrack, for fast searching. */
static unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net,
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
struct {
struct nf_conntrack_man src;
u32 net_mix;
u32 protonum;
+ u32 zone;
} __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
@@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
/* Original src, to ensure we map it consistently if poss. */
combined.src = tuple->src;
- combined.net_mix = net_hash_mix(n);
+ combined.net_mix = net_hash_mix(net);
combined.protonum = tuple->dst.protonum;
+ /* Zone ID can be used provided its valid for both directions */
+ if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
+ combined.zone = zone->id;
+
hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
@@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
{
- unsigned int h = hash_by_src(net, tuple);
+ unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn *ct;
hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
@@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
unsigned int srchash;
spinlock_t *lock;
- srchash = hash_by_src(net,
+ srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
spin_lock_bh(lock);
@@ -692,6 +699,16 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(nf_nat_packet);
+static bool in_vrf_postrouting(const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (state->hook == NF_INET_POST_ROUTING &&
+ netif_is_l3_master(state->out))
+ return true;
+#endif
+ return false;
+}
+
unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -708,7 +725,7 @@ nf_nat_inet_fn(void *priv, struct sk_buff *skb,
* packet filter it out, or implement conntrack/NAT for that
* protocol. 8) --RR
*/
- if (!ct)
+ if (!ct || in_vrf_postrouting(state))
return NF_ACCEPT;
nat = nfct_nat(ct);
@@ -788,7 +805,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
- h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..acd73f717a08 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -9,8 +9,19 @@
#include <net/netfilter/nf_nat_masquerade.h>
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ union nf_inet_addr addr;
+ int ifindex;
+ int (*iter)(struct nf_conn *i, void *data);
+};
+
+#define MAX_MASQ_WORKER_COUNT 16
+
static DEFINE_MUTEX(masq_mutex);
static unsigned int masq_refcnt __read_mostly;
+static atomic_t masq_worker_count __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
-static int device_cmp(struct nf_conn *i, void *ifindex)
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&masq_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* Iterate conntrack table in the background and remove conntrack entries
+ * that use the device/address being removed.
+ *
+ * In case too many work items have been queued already or memory allocation
+ * fails iteration is skipped, conntrack entries will time out eventually.
+ */
+static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
+ int ifindex,
+ int (*iter)(struct nf_conn *i, void *data),
+ gfp_t gfp_flags)
+{
+ struct masq_dev_work *w;
+
+ if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
+ return;
+
+ net = maybe_get_net(net);
+ if (!net)
+ return;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kzalloc(sizeof(*w), gfp_flags);
+ if (w) {
+ /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
+ atomic_inc(&masq_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = ifindex;
+ w->net = net;
+ w->iter = iter;
+ if (addr)
+ w->addr = *addr;
+ schedule_work(&w->work);
+ return;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+}
+
+static int device_cmp(struct nf_conn *i, void *arg)
{
const struct nf_conn_nat *nat = nfct_nat(i);
+ const struct masq_dev_work *w = arg;
if (!nat)
return 0;
- return nat->masq_index == (int)(long)ifindex;
+ return nat->masq_index == w->ifindex;
}
static int masq_device_event(struct notifier_block *this,
@@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
* and forget them.
*/
- nf_ct_iterate_cleanup_net(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
+ nf_nat_masq_schedule(net, NULL, dev->ifindex,
+ device_cmp, GFP_KERNEL);
}
return NOTIFY_DONE;
@@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
static int inet_cmp(struct nf_conn *ct, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
struct nf_conntrack_tuple *tuple;
+ struct masq_dev_work *w = ptr;
- if (!device_cmp(ct, (void *)(long)dev->ifindex))
+ if (!device_cmp(ct, ptr))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- return ifa->ifa_address == tuple->dst.u3.ip;
+ return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
}
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
- struct net *net = dev_net(idev->dev);
+ const struct in_ifaddr *ifa = ptr;
+ const struct in_device *idev;
+ const struct net_device *dev;
+ union nf_inet_addr addr;
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
* no work to do. Otherwise this is an individual address removal
* and we have to perform the flush.
*/
+ idev = ifa->ifa_dev;
if (idev->dead)
return NOTIFY_DONE;
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+ memset(&addr, 0, sizeof(addr));
+
+ addr.ip = ifa->ifa_address;
+
+ dev = idev->dev;
+ nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
+ inet_cmp, GFP_KERNEL);
return NOTIFY_DONE;
}
@@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
};
#if IS_ENABLED(CONFIG_IPV6)
-static atomic_t v6_worker_count __read_mostly;
-
static int
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
@@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-struct masq_dev_work {
- struct work_struct work;
- struct net *net;
- struct in6_addr addr;
- int ifindex;
-};
-
-static int inet6_cmp(struct nf_conn *ct, void *work)
-{
- struct masq_dev_work *w = (struct masq_dev_work *)work;
- struct nf_conntrack_tuple *tuple;
-
- if (!device_cmp(ct, (void *)(long)w->ifindex))
- return 0;
-
- tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
- return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
- struct masq_dev_work *w;
-
- w = container_of(work, struct masq_dev_work, work);
-
- nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
-
- put_net(w->net);
- kfree(w);
- atomic_dec(&v6_worker_count);
- module_put(THIS_MODULE);
-}
-
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
*
* Defer it to the system workqueue.
@@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
- struct masq_dev_work *w;
- struct net *net;
+ union nf_inet_addr addr;
- if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+ if (event != NETDEV_DOWN)
return NOTIFY_DONE;
dev = ifa->idev->dev;
- net = maybe_get_net(dev_net(dev));
- if (!net)
- return NOTIFY_DONE;
- if (!try_module_get(THIS_MODULE))
- goto err_module;
+ memset(&addr, 0, sizeof(addr));
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- atomic_inc(&v6_worker_count);
-
- INIT_WORK(&w->work, iterate_cleanup_work);
- w->ifindex = dev->ifindex;
- w->net = net;
- w->addr = ifa->addr;
- schedule_work(&w->work);
+ addr.in6 = ifa->addr;
- return NOTIFY_DONE;
- }
-
- module_put(THIS_MODULE);
- err_module:
- put_net(net);
+ nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
+ GFP_ATOMIC);
return NOTIFY_DONE;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 081437dd75b7..c0851fec11d4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -780,6 +780,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -790,8 +791,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table);
+ event, flags, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -1563,6 +1567,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -1573,8 +1578,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
+ event, flags, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -2866,8 +2874,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
- const struct nft_rule *rule,
- const struct nft_rule *prule)
+ const struct nft_rule *rule, u64 handle)
{
struct nlmsghdr *nlh;
const struct nft_expr *expr, *next;
@@ -2887,9 +2894,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
NFTA_RULE_PAD))
goto nla_put_failure;
- if (event != NFT_MSG_DELRULE && prule) {
- if (nla_put_be64(skb, NFTA_RULE_POSITION,
- cpu_to_be64(prule->handle),
+ if (event != NFT_MSG_DELRULE && handle) {
+ if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle),
NFTA_RULE_PAD))
goto nla_put_failure;
}
@@ -2925,7 +2931,10 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule, int event)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ const struct nft_rule *prule;
struct sk_buff *skb;
+ u64 handle = 0;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -2936,9 +2945,20 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (event == NFT_MSG_NEWRULE &&
+ !list_is_first(&rule->list, &ctx->chain->rules) &&
+ !list_is_last(&rule->list, &ctx->chain->rules)) {
+ prule = list_prev_entry(rule, list);
+ handle = prule->handle;
+ }
+ if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE))
+ flags |= NLM_F_APPEND;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
- ctx->chain, rule, NULL);
+ event, flags, ctx->family, ctx->table,
+ ctx->chain, rule, handle);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -2964,6 +2984,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
unsigned int s_idx = cb->args[0];
+ u64 handle;
prule = NULL;
list_for_each_entry_rcu(rule, &chain->rules, list) {
@@ -2975,12 +2996,17 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
}
+ if (prule)
+ handle = prule->handle;
+ else
+ handle = 0;
+
if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, prule) < 0)
+ table, chain, rule, handle) < 0)
return 1;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3143,7 +3169,7 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
- family, table, chain, rule, NULL);
+ family, table, chain, rule, 0);
if (err < 0)
goto err_fill_rule_info;
@@ -3403,17 +3429,15 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0)
+ goto err_destroy_flow_rule;
+
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err_destroy_flow_rule;
}
- err = nft_delrule(&ctx, old_rule);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_destroy_flow_rule;
- }
-
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
@@ -3943,8 +3967,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
gfp_t gfp_flags)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
- struct sk_buff *skb;
u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -3955,7 +3980,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_set(skb, ctx, set, event, 0);
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+ err = nf_tables_fill_set(skb, ctx, set, event, flags);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -4336,7 +4364,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
alloc_size = sizeof(*set) + size + udlen;
- if (alloc_size < size)
+ if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
@@ -5231,12 +5259,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
- int event, u16 flags)
+ int event)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
u32 portid = ctx->portid;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -5246,6 +5275,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
set, elem);
if (err < 0) {
@@ -6921,7 +6953,7 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
- int family, int report, gfp_t gfp)
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
@@ -6946,8 +6978,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
if (skb == NULL)
goto err;
- err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
- table, obj, false);
+ err = nf_tables_fill_obj_info(skb, net, portid, seq, event,
+ flags & (NLM_F_CREATE | NLM_F_EXCL),
+ family, table, obj, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -6964,7 +6997,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->family, ctx->report, GFP_KERNEL);
+ ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
}
/*
@@ -7745,6 +7778,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -7755,8 +7789,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
- ctx->seq, event, 0,
+ ctx->seq, event, flags,
ctx->family, flowtable, hook_list);
if (err < 0) {
kfree_skb(skb);
@@ -8634,7 +8671,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_setelem_activate(net, te->set, &te->elem);
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_NEWSETELEM, 0);
+ NFT_MSG_NEWSETELEM);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -8642,7 +8679,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_DELSETELEM, 0);
+ NFT_MSG_DELSETELEM);
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem)) {
atomic_dec(&te->set->nelems);
@@ -9599,7 +9636,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
table->use--;
nf_tables_chain_destroy(&ctx);
}
- list_del(&table->list);
nf_tables_table_destroy(&ctx);
}
@@ -9612,6 +9648,8 @@ static void __nft_release_tables(struct net *net)
if (nft_table_has_owner(table))
continue;
+ list_del(&table->list);
+
__nft_release_table(net, table);
}
}
@@ -9619,31 +9657,38 @@ static void __nft_release_tables(struct net *net)
static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
+ struct nft_table *table, *to_delete[8];
struct nftables_pernet *nft_net;
struct netlink_notify *n = ptr;
- struct nft_table *table, *nt;
struct net *net = n->net;
- bool release = false;
+ unsigned int deleted;
+ bool restart = false;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
nft_net = nft_pernet(net);
+ deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
__nft_release_hook(net, table);
- release = true;
+ list_del_rcu(&table->list);
+ to_delete[deleted++] = table;
+ if (deleted >= ARRAY_SIZE(to_delete))
+ break;
}
}
- if (release) {
+ if (deleted) {
+ restart = deleted >= ARRAY_SIZE(to_delete);
synchronize_rcu();
- list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
- if (nft_table_has_owner(table) &&
- n->portid == table->nlpid)
- __nft_release_table(net, table);
- }
+ while (deleted)
+ __nft_release_table(net, to_delete[--deleted]);
+
+ if (restart)
+ goto again;
}
mutex_unlock(&nft_net->commit_mutex);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 866cfba04d6c..adc348056076 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -79,7 +79,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
ptr = skb_network_header(skb);
else {
- if (!pkt->tprot_set)
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
return false;
ptr = skb_network_header(skb) + nft_thoff(pkt);
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e4fe2f0780eb..84a7dea46efa 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -113,13 +113,13 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
int off = skb_network_offset(skb);
unsigned int len, nh_end;
- nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len;
+ nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
len = min_t(unsigned int, nh_end - skb_network_offset(skb),
NFT_TRACETYPE_NETWORK_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
return -1;
- if (pkt->tprot_set) {
+ if (pkt->flags & NFT_PKTINFO_L4PROTO) {
len = min_t(unsigned int, skb->len - nft_thoff(pkt),
NFT_TRACETYPE_TRANSPORT_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index f554e2ea32ee..d5c719c9e36c 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -185,7 +185,7 @@ static const struct nf_hook_entries *
nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
{
const struct nf_hook_entries *hook_head = NULL;
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
struct net_device *netdev;
#endif
@@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
break;
#endif
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
case NFPROTO_NETDEV:
- if (hook != NF_NETDEV_INGRESS)
+ if (hook >= NF_NETDEV_NUMHOOKS)
return ERR_PTR(-EOPNOTSUPP);
if (!dev)
@@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
if (!netdev)
return ERR_PTR(-ENODEV);
- return rcu_dereference(netdev->nf_hooks_ingress);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (hook == NF_NETDEV_INGRESS)
+ return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hook == NF_NETDEV_EGRESS)
+ return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+ fallthrough;
#endif
default:
return ERR_PTR(-EPROTONOSUPPORT);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4c3fbaaeb103..4acc4b8e9fe5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -560,7 +560,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nla_put_failure;
if (indev && entskb->dev &&
- entskb->mac_header != entskb->network_header) {
+ skb_mac_header_was_set(entskb)) {
struct nfqnl_msg_packet_hw phw;
int len;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 5b02408a920b..c3563f0be269 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
.family = NFPROTO_NETDEV,
- .hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hook_mask = (1 << NF_NETDEV_INGRESS) |
+ (1 << NF_NETDEV_EGRESS),
.hooks = {
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ [NF_NETDEV_EGRESS] = nft_do_chain_netdev,
},
};
@@ -342,12 +344,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
return;
}
- /* UNREGISTER events are also happening on netns exit.
- *
- * Although nf_tables core releases all tables/chains, only this event
- * handler provides guarantee that hook->ops.dev is still accessible,
- * so we cannot skip exiting net namespaces.
- */
__nft_release_basechain(ctx);
}
@@ -366,6 +362,9 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
+ if (!check_net(ctx.net))
+ return NOTIFY_DONE;
+
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 272bcdb1392d..f69cc73c5813 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,6 +19,7 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_compat_wait_for_destructors();
ret = xt_check_target(&par, size, proto, inv);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ const char *modname = NULL;
+
+ if (strcmp(target->name, "LOG") == 0)
+ modname = "nf_log_syslog";
+ else if (strcmp(target->name, "NFLOG") == 0)
+ modname = "nfnetlink_log";
+
+ if (modname &&
+ nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
+ return -EAGAIN;
+ }
+
return ret;
+ }
/* The standard target cannot be used */
if (!target->target)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 6ba3256fa844..87f3af4645d9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -198,17 +198,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return -EBUSY;
priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
- switch (priv->op) {
- case NFT_DYNSET_OP_ADD:
- case NFT_DYNSET_OP_DELETE:
- break;
- case NFT_DYNSET_OP_UPDATE:
- if (!(set->flags & NFT_SET_TIMEOUT))
- return -EOPNOTSUPP;
- break;
- default:
+ if (priv->op > NFT_DYNSET_OP_DELETE)
return -EOPNOTSUPP;
- }
timeout = 0;
if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index a7e01e9952f1..fe91ff5f8fbe 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -244,7 +244,11 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
case NFT_META_OIF:
nft_meta_store_ifindex(dest, nft_out(pkt));
break;
- case NFT_META_IIFTYPE:
+ case NFT_META_IFTYPE:
+ if (!nft_meta_store_iftype(dest, pkt->skb->dev))
+ return false;
+ break;
+ case __NFT_META_IIFTYPE:
if (!nft_meta_store_iftype(dest, nft_in(pkt)))
return false;
break;
@@ -329,7 +333,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
nft_reg_store8(dest, nft_pf(pkt));
break;
case NFT_META_L4PROTO:
- if (!pkt->tprot_set)
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
goto err;
nft_reg_store8(dest, pkt->tprot);
break;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a44b14f6c0dc..cbfe4e4a4ad7 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,6 +22,7 @@
#include <linux/icmpv6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/ip.h>
#include <net/sctp/checksum.h>
static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
@@ -79,6 +80,45 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
}
+static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
+{
+ unsigned int thoff = nft_thoff(pkt);
+
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ return -1;
+
+ switch (pkt->tprot) {
+ case IPPROTO_UDP:
+ pkt->inneroff = thoff + sizeof(struct udphdr);
+ break;
+ case IPPROTO_TCP: {
+ struct tcphdr *th, _tcph;
+
+ th = skb_header_pointer(pkt->skb, thoff, sizeof(_tcph), &_tcph);
+ if (!th)
+ return -1;
+
+ pkt->inneroff = thoff + __tcp_hdrlen(th);
+ }
+ break;
+ default:
+ return -1;
+ }
+
+ pkt->flags |= NFT_PKTINFO_INNER;
+
+ return 0;
+}
+
+static int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
+{
+ if (!(pkt->flags & NFT_PKTINFO_INNER) &&
+ __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0)
+ return -1;
+
+ return pkt->inneroff;
+}
+
void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -108,10 +148,15 @@ void nft_payload_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!pkt->tprot_set)
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
goto err;
offset = nft_thoff(pkt);
break;
+ case NFT_PAYLOAD_INNER_HEADER:
+ offset = nft_payload_inner_offset(pkt);
+ if (offset < 0)
+ goto err;
+ break;
default:
BUG();
}
@@ -610,10 +655,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!pkt->tprot_set)
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
goto err;
offset = nft_thoff(pkt);
break;
+ case NFT_PAYLOAD_INNER_HEADER:
+ offset = nft_payload_inner_offset(pkt);
+ if (offset < 0)
+ goto err;
+ break;
default:
BUG();
}
@@ -622,7 +672,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
offset += priv->offset;
if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) &&
- (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
+ ((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER &&
+ priv->base != NFT_PAYLOAD_INNER_HEADER) ||
skb->ip_summed != CHECKSUM_PARTIAL)) {
fsum = skb_checksum(skb, offset, priv->len, 0);
tsum = csum_partial(src, priv->len, 0);
@@ -741,6 +792,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
case NFT_PAYLOAD_LL_HEADER:
case NFT_PAYLOAD_NETWORK_HEADER:
case NFT_PAYLOAD_TRANSPORT_HEADER:
+ case NFT_PAYLOAD_INNER_HEADER:
break;
default:
return ERR_PTR(-EOPNOTSUPP);
@@ -759,7 +811,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
- base != NFT_PAYLOAD_LL_HEADER)
+ base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
return &nft_payload_fast_ops;
else
return &nft_payload_ops;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0363f533a42b..c4d1389f7185 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -60,7 +60,7 @@ static void nft_quota_obj_eval(struct nft_object *obj,
if (overquota &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
- NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
+ NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
}
static int nft_quota_do_init(const struct nlattr * const tb[],
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 7b2f359bfce4..2f7cf5ecebf4 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -137,7 +137,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
- info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+ info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL);
if (!info->timer) {
ret = -ENOMEM;
goto out;
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 2ff75f7637b0..f39244f9c0ed 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int log_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
+ int ret;
if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
return -EINVAL;
@@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nf_log_syslog");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ }
+
+ return ret;
}
static void log_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb5793208059..e660c3710a10 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int nflog_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
+ int ret;
if (info->flags & ~XT_NFLOG_MASK)
return -EINVAL;
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
return -EINVAL;
- return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nfnetlink_log");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ }
+
+ return ret;
}
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 0d5c422f8745..8aec1b529364 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -94,11 +94,11 @@ static unsigned int
xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_rateest_target_info *info = par->targinfo;
- struct gnet_stats_basic_packed *stats = &info->est->bstats;
+ struct gnet_stats_basic_sync *stats = &info->est->bstats;
spin_lock_bh(&info->est->lock);
- stats->bytes += skb->len;
- stats->packets++;
+ u64_stats_add(&stats->bytes, skb->len);
+ u64_stats_inc(&stats->packets);
spin_unlock_bh(&info->est->lock);
return XT_CONTINUE;
@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
if (!est)
goto err1;
+ gnet_stats_basic_sync_init(&est->bstats);
strlcpy(est->name, info->name, sizeof(est->name));
spin_lock_init(&est->lock);
est->refcnt = 1;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 24b7cf447bc5..4c575324a985 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -594,7 +594,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
- nlk_sk(sk)->bound = portid;
+ /* Paired with lockless reads from netlink_bind(),
+ * netlink_connect() and netlink_sendmsg().
+ */
+ WRITE_ONCE(nlk_sk(sk)->bound, portid);
err:
release_sock(sk);
@@ -1012,7 +1015,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->ngroups < BITS_PER_LONG)
groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ bound = READ_ONCE(nlk->bound);
if (bound) {
/* Ensure nlk->portid is up-to-date. */
smp_rmb();
@@ -1098,8 +1102,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
+ * Paired with WRITE_ONCE() in netlink_insert().
*/
- if (!nlk->bound)
+ if (!READ_ONCE(nlk->bound))
err = netlink_autobind(sock);
if (err == 0) {
@@ -1407,8 +1412,6 @@ struct netlink_broadcast_data {
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
- int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
- void *tx_data;
};
static void do_one_broadcast(struct sock *sk,
@@ -1462,11 +1465,6 @@ static void do_one_broadcast(struct sock *sk,
p->delivery_failure = 1;
goto out;
}
- if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
- kfree_skb(p->skb2);
- p->skb2 = NULL;
- goto out;
- }
if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
@@ -1489,10 +1487,8 @@ out:
sock_put(sk);
}
-int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation,
- int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
- void *filter_data)
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+ u32 group, gfp_t allocation)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
@@ -1511,8 +1507,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
- info.tx_filter = filter;
- info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
@@ -1538,14 +1532,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
}
return -ESRCH;
}
-EXPORT_SYMBOL(netlink_broadcast_filtered);
-
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation)
-{
- return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
- NULL, NULL);
-}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
@@ -1888,7 +1874,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dst_group = nlk->dst_group;
}
- if (!nlk->bound) {
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ if (!READ_ONCE(nlk->bound)) {
err = netlink_autobind(sock);
if (err)
goto out;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6d16e1ab1a8a..775064cdd0ee 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -633,7 +633,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr;
- ax25_address *source = NULL;
+ const ax25_address *source = NULL;
ax25_uid_assoc *user;
struct net_device *dev;
int err = 0;
@@ -673,7 +673,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENETUNREACH;
goto out_release;
}
- source = (ax25_address *)dev->dev_addr;
+ source = (const ax25_address *)dev->dev_addr;
user = ax25_findbyuid(current_euid());
if (user) {
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 29e418c8c6c3..3aaac4a22b38 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -108,10 +108,10 @@ static int __must_check nr_set_mac_address(struct net_device *dev, void *addr)
if (err)
return err;
- ax25_listen_release((ax25_address *)dev->dev_addr, NULL);
+ ax25_listen_release((const ax25_address *)dev->dev_addr, NULL);
}
- memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+ dev_addr_set(dev, sa->sa_data);
return 0;
}
@@ -120,7 +120,7 @@ static int nr_open(struct net_device *dev)
{
int err;
- err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL);
+ err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL);
if (err)
return err;
@@ -131,7 +131,7 @@ static int nr_open(struct net_device *dev)
static int nr_close(struct net_device *dev)
{
- ax25_listen_release((ax25_address *)dev->dev_addr, NULL);
+ ax25_listen_release((const ax25_address *)dev->dev_addr, NULL);
netif_stop_queue(dev);
return 0;
}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index ddd5cbd455e3..baea3cbd76ca 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -598,7 +598,7 @@ struct net_device *nr_dev_get(ax25_address *addr)
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM &&
- ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
+ ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
@@ -825,7 +825,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
ax25s = nr_neigh->ax25;
nr_neigh->ax25 = ax25_send_frame(skb, 256,
- (ax25_address *)dev->dev_addr,
+ (const ax25_address *)dev->dev_addr,
&nr_neigh->callsign,
nr_neigh->digipeat, nr_neigh->dev);
if (ax25s)
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 6024fad905ff..dda323e0a473 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto)
proto_tab[nfc_proto->id] = nfc_proto;
write_unlock(&proto_tab_lock);
+ if (rc)
+ proto_unregister(nfc_proto->proto);
+
return rc;
}
EXPORT_SYMBOL(nfc_proto_register);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index fefc03674f4f..d63d2e5dc60c 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param)
static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
+ int rc;
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
@@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
params->sc = DIGITAL_SENSF_FELICA_SC;
- return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
- 500, digital_tg_recv_atr_req, NULL);
+ rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
+ 500, digital_tg_recv_atr_req, NULL);
+ if (rc)
+ kfree(params);
+
+ return rc;
}
static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 84d2345c75a3..3adf4589852a 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
skb_put_u8(skb, sel_cmd);
skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR);
- return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
- target);
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
}
static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 3a89bd9b89fc..af6bacb3ba98 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -114,8 +114,6 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
{
u8 pipe;
- pr_debug("\n");
-
pipe = hdev->gate2pipe[gate];
if (pipe == NFC_HCI_INVALID_PIPE)
return -EADDRNOTAVAIL;
@@ -130,8 +128,6 @@ int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
{
u8 pipe;
- pr_debug("\n");
-
pipe = hdev->gate2pipe[gate];
if (pipe == NFC_HCI_INVALID_PIPE)
return -EADDRNOTAVAIL;
@@ -205,8 +201,6 @@ static int nfc_hci_open_pipe(struct nfc_hci_dev *hdev, u8 pipe)
static int nfc_hci_close_pipe(struct nfc_hci_dev *hdev, u8 pipe)
{
- pr_debug("\n");
-
return nfc_hci_execute_cmd(hdev, pipe, NFC_HCI_ANY_CLOSE_PIPE,
NULL, 0, NULL);
}
@@ -242,8 +236,6 @@ static u8 nfc_hci_create_pipe(struct nfc_hci_dev *hdev, u8 dest_host,
static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe)
{
- pr_debug("\n");
-
return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE,
NFC_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
}
@@ -256,8 +248,6 @@ static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev)
/* TODO: Find out what the identity reference data is
* and fill param with it. HCI spec 6.1.3.5 */
- pr_debug("\n");
-
if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks))
param_len = 0;
@@ -271,8 +261,6 @@ int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate)
int r;
u8 pipe = hdev->gate2pipe[gate];
- pr_debug("\n");
-
if (pipe == NFC_HCI_INVALID_PIPE)
return -EADDRNOTAVAIL;
@@ -296,8 +284,6 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev)
{
int r;
- pr_debug("\n");
-
r = nfc_hci_clear_all_pipes(hdev);
if (r < 0)
return r;
@@ -314,8 +300,6 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate,
bool pipe_created = false;
int r;
- pr_debug("\n");
-
if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE)
return 0;
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index aef750d7787c..e90f70385813 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -201,8 +201,7 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr)
del_timer_sync(&shdlc->t2_timer);
shdlc->t2_active = false;
- pr_debug
- ("All sent frames acked. Stopped T2(retransmit)\n");
+ pr_debug("All sent frames acked. Stopped T2(retransmit)\n");
}
} else {
skb = skb_peek(&shdlc->ack_pending_q);
@@ -211,8 +210,7 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr)
msecs_to_jiffies(SHDLC_T2_VALUE_MS));
shdlc->t2_active = true;
- pr_debug
- ("Start T2(retransmit) for remaining unacked sent frames\n");
+ pr_debug("Start T2(retransmit) for remaining unacked sent frames\n");
}
}
@@ -365,8 +363,6 @@ static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
- pr_debug("\n");
-
skb = llc_shdlc_alloc_skb(shdlc, 2);
if (skb == NULL)
return -ENOMEM;
@@ -381,8 +377,6 @@ static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
- pr_debug("\n");
-
skb = llc_shdlc_alloc_skb(shdlc, 0);
if (skb == NULL)
return -ENOMEM;
@@ -522,12 +516,11 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
unsigned long time_sent;
if (shdlc->send_q.qlen)
- pr_debug
- ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n",
- shdlc->send_q.qlen, shdlc->ns, shdlc->dnr,
- shdlc->rnr == false ? "false" : "true",
- shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr),
- shdlc->ack_pending_q.qlen);
+ pr_debug("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n",
+ shdlc->send_q.qlen, shdlc->ns, shdlc->dnr,
+ shdlc->rnr == false ? "false" : "true",
+ shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr),
+ shdlc->ack_pending_q.qlen);
while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w &&
(shdlc->rnr == false)) {
@@ -573,8 +566,6 @@ static void llc_shdlc_connect_timeout(struct timer_list *t)
{
struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
- pr_debug("\n");
-
schedule_work(&shdlc->sm_work);
}
@@ -601,8 +592,6 @@ static void llc_shdlc_sm_work(struct work_struct *work)
struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work);
int r;
- pr_debug("\n");
-
mutex_lock(&shdlc->state_mutex);
switch (shdlc->state) {
@@ -649,8 +638,7 @@ static void llc_shdlc_sm_work(struct work_struct *work)
llc_shdlc_handle_send_queue(shdlc);
if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) {
- pr_debug
- ("Handle T1(send ack) elapsed (T1 now inactive)\n");
+ pr_debug("Handle T1(send ack) elapsed (T1 now inactive)\n");
shdlc->t1_active = false;
r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR,
@@ -660,8 +648,7 @@ static void llc_shdlc_sm_work(struct work_struct *work)
}
if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) {
- pr_debug
- ("Handle T2(retransmit) elapsed (T2 inactive)\n");
+ pr_debug("Handle T2(retransmit) elapsed (T2 inactive)\n");
shdlc->t2_active = false;
@@ -686,8 +673,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc)
{
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq);
- pr_debug("\n");
-
mutex_lock(&shdlc->state_mutex);
shdlc->state = SHDLC_CONNECTING;
@@ -706,8 +691,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc)
static void llc_shdlc_disconnect(struct llc_shdlc *shdlc)
{
- pr_debug("\n");
-
mutex_lock(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 3c4172a5aeb5..41e3a20c8935 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -337,8 +337,6 @@ int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
struct nfc_dev *dev;
struct nfc_llcp_local *local;
- pr_debug("Sending DISC\n");
-
local = sock->local;
if (local == NULL)
return -ENODEV;
@@ -362,8 +360,6 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
struct nfc_llcp_local *local;
u16 size = 0;
- pr_debug("Sending SYMM\n");
-
local = nfc_llcp_find_local(dev);
if (local == NULL)
return -ENODEV;
@@ -399,8 +395,6 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
u16 size = 0;
__be16 miux;
- pr_debug("Sending CONNECT\n");
-
local = sock->local;
if (local == NULL)
return -ENODEV;
@@ -475,8 +469,6 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
u16 size = 0;
__be16 miux;
- pr_debug("Sending CC\n");
-
local = sock->local;
if (local == NULL)
return -ENODEV;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index eaeb2b1cfa6a..5ad5157aa9c5 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -45,8 +45,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
struct nfc_llcp_local *local = sock->local;
struct sk_buff *s, *tmp;
- pr_debug("%p\n", &sock->sk);
-
skb_queue_purge(&sock->tx_queue);
skb_queue_purge(&sock->tx_pending_queue);
@@ -1505,9 +1503,8 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
{
struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
- pr_debug("Received an LLCP PDU\n");
if (err < 0) {
- pr_err("err %d\n", err);
+ pr_err("LLCP PDU receive err %d\n", err);
return;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 82ab39d80726..6fd873aa86be 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -930,8 +930,6 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
- pr_debug("entry\n");
-
if (!ndev->target_active_prot) {
pr_err("unable to deactivate target, no active target\n");
return;
@@ -977,8 +975,6 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
int rc;
- pr_debug("entry\n");
-
if (nfc_dev->rf_mode == NFC_RF_INITIATOR) {
nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE);
} else {
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index e199912ee1e5..19703a649b5a 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -432,8 +432,6 @@ void nci_hci_data_received_cb(void *context,
struct sk_buff *frag_skb;
int msg_len;
- pr_debug("\n");
-
if (err) {
nci_req_complete(ndev, err);
return;
@@ -547,8 +545,6 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
{
- pr_debug("\n");
-
return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index c5eacaac41ae..282c51051dcc 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -738,8 +738,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
const struct nci_nfcee_discover_ntf *nfcee_ntf =
(struct nci_nfcee_discover_ntf *)skb->data;
- pr_debug("\n");
-
/* NFCForum NCI 9.2.1 HCI Network Specific Handling
* If the NFCC supports the HCI Network, it SHALL return one,
* and only one, NFCEE_DISCOVER_NTF with a Protocol type of
@@ -751,12 +749,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
nci_req_complete(ndev, status);
}
-static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
- const struct sk_buff *skb)
-{
- pr_debug("\n");
-}
-
void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
{
__u16 ntf_opcode = nci_opcode(skb->data);
@@ -813,7 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
break;
case NCI_OP_RF_NFCEE_ACTION_NTF:
- nci_nfcee_action_ntf_packet(ndev, skb);
break;
default:
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index a2e72c003805..b911ab78bed9 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -334,6 +334,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 502e7a3f8948..c027c76d493c 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2015, Marvell International Ltd.
*
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License"). You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
- * this warranty disclaimer.
- */
-
-/* Inspired (hugely) by HCI LDISC implementation in Bluetooth.
+ * Inspired (hugely) by HCI LDISC implementation in Bluetooth.
*
* Copyright (C) 2000-2001 Qualcomm Incorporated
* Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com>
@@ -349,7 +337,7 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
return -EBUSY;
break;
default:
- err = n_tty_ioctl_helper(tty, file, cmd, arg);
+ err = n_tty_ioctl_helper(tty, cmd, arg);
break;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 49089c50872e..334f63c9529e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1664,31 +1664,37 @@ static const struct genl_ops nfc_genl_ops[] = {
.cmd = NFC_CMD_DEV_UP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_dev_up,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_DEV_DOWN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_dev_down,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_START_POLL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_start_poll,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_STOP_POLL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_stop_poll,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_DEP_LINK_UP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_dep_link_up,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_DEP_LINK_DOWN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_dep_link_down,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_GET_TARGET,
@@ -1706,26 +1712,31 @@ static const struct genl_ops nfc_genl_ops[] = {
.cmd = NFC_CMD_LLC_SET_PARAMS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_llc_set_params,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_LLC_SDREQ,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_llc_sdreq,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_FW_DOWNLOAD,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_fw_download,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_ENABLE_SE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_enable_se,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_DISABLE_SE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_disable_se,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_GET_SE,
@@ -1737,21 +1748,25 @@ static const struct genl_ops nfc_genl_ops[] = {
.cmd = NFC_CMD_SE_IO,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_se_io,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_ACTIVATE_TARGET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_activate_target,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_VENDOR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_vendor_cmd,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NFC_CMD_DEACTIVATE_TARGET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nfc_genl_deactivate_target,
+ .flags = GENL_ADMIN_PERM,
},
};
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 896b8f5bc885..04a060ac7fdf 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -12,7 +12,6 @@
#include <linux/openvswitch.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
-#include <linux/swap.h>
#include <net/netlink.h>
#include <net/genetlink.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 543365f58e97..46943a18a10d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -46,6 +46,8 @@
* Copyright (C) 2011, <lokec@ccs.neu.edu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -89,6 +91,7 @@
#endif
#include <linux/bpf.h>
#include <net/compat.h>
+#include <linux/netfilter_netdev.h>
#include "internal.h"
@@ -239,8 +242,42 @@ struct packet_skb_cb {
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
+#ifdef CONFIG_NETFILTER_EGRESS
+static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
+{
+ struct sk_buff *next, *head = NULL, *tail;
+ int rc;
+
+ rcu_read_lock();
+ for (; skb != NULL; skb = next) {
+ next = skb->next;
+ skb_mark_not_on_list(skb);
+
+ if (!nf_hook_egress(skb, &rc, skb->dev))
+ continue;
+
+ if (!head)
+ head = skb;
+ else
+ tail->next = skb;
+
+ tail = skb;
+ }
+ rcu_read_unlock();
+
+ return head;
+}
+#endif
+
static int packet_direct_xmit(struct sk_buff *skb)
{
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_direct_egress(skb);
+ if (!skb)
+ return NET_XMIT_DROP;
+ }
+#endif
return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
}
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
index 1b1411d158a7..8e0605f88a73 100644
--- a/net/qrtr/Makefile
+++ b/net/qrtr/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_QRTR) := qrtr.o ns.o
+obj-$(CONFIG_QRTR) += qrtr.o
+qrtr-y := af_qrtr.o ns.o
obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
qrtr-smd-y := smd.o
diff --git a/net/qrtr/qrtr.c b/net/qrtr/af_qrtr.c
index ec2322529727..ec2322529727 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/af_qrtr.c
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index cf7d974e0f61..30a1cf4c16c6 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -109,7 +109,7 @@ char *rose2asc(char *buf, const rose_address *addr)
/*
* Compare two ROSE addresses, 0 == equal.
*/
-int rosecmp(rose_address *addr1, rose_address *addr2)
+int rosecmp(const rose_address *addr1, const rose_address *addr2)
{
int i;
@@ -123,7 +123,8 @@ int rosecmp(rose_address *addr1, rose_address *addr2)
/*
* Compare two ROSE addresses for only mask digits, 0 == equal.
*/
-int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask)
+int rosecmpm(const rose_address *addr1, const rose_address *addr2,
+ unsigned short mask)
{
unsigned int i, j;
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 051804fbee17..f1a76a5820f1 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -66,10 +66,10 @@ static int rose_set_mac_address(struct net_device *dev, void *addr)
if (err)
return err;
- rose_del_loopback_node((rose_address *)dev->dev_addr);
+ rose_del_loopback_node((const rose_address *)dev->dev_addr);
}
- memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+ dev_addr_set(dev, sa->sa_data);
return 0;
}
@@ -78,7 +78,7 @@ static int rose_open(struct net_device *dev)
{
int err;
- err = rose_add_loopback_node((rose_address *)dev->dev_addr);
+ err = rose_add_loopback_node((const rose_address *)dev->dev_addr);
if (err)
return err;
@@ -90,7 +90,7 @@ static int rose_open(struct net_device *dev)
static int rose_close(struct net_device *dev)
{
netif_stop_queue(dev);
- rose_del_loopback_node((rose_address *)dev->dev_addr);
+ rose_del_loopback_node((const rose_address *)dev->dev_addr);
return 0;
}
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index f6102e6f5161..8b96a56d3a49 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -94,11 +94,11 @@ static void rose_t0timer_expiry(struct timer_list *t)
*/
static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
{
- ax25_address *rose_call;
+ const ax25_address *rose_call;
ax25_cb *ax25s;
if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
- rose_call = (ax25_address *)neigh->dev->dev_addr;
+ rose_call = (const ax25_address *)neigh->dev->dev_addr;
else
rose_call = &rose_callsign;
@@ -117,11 +117,11 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
*/
static int rose_link_up(struct rose_neigh *neigh)
{
- ax25_address *rose_call;
+ const ax25_address *rose_call;
ax25_cb *ax25s;
if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
- rose_call = (ax25_address *)neigh->dev->dev_addr;
+ rose_call = (const ax25_address *)neigh->dev->dev_addr;
else
rose_call = &rose_callsign;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index c0e04c261a15..e2e6b6b78578 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -401,7 +401,7 @@ void rose_add_loopback_neigh(void)
/*
* Add a loopback node.
*/
-int rose_add_loopback_node(rose_address *address)
+int rose_add_loopback_node(const rose_address *address)
{
struct rose_node *rose_node;
int err = 0;
@@ -446,7 +446,7 @@ out:
/*
* Delete a loopback node.
*/
-void rose_del_loopback_node(rose_address *address)
+void rose_del_loopback_node(const rose_address *address)
{
struct rose_node *rose_node;
@@ -629,7 +629,8 @@ struct net_device *rose_dev_get(rose_address *addr)
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
- if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE &&
+ rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
@@ -646,7 +647,8 @@ static int rose_dev_exists(rose_address *addr)
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
- if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE &&
+ rosecmp(addr, (const rose_address *)dev->dev_addr) == 0)
goto out;
}
dev = NULL;
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
index 4e565eeab426..be61d6f5be8d 100644
--- a/net/rxrpc/rtt.c
+++ b/net/rxrpc/rtt.c
@@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
{
- return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+ return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
}
static u32 rxrpc_bound_rto(u32 rto)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 7dd3a2dc5fa4..3258da3d5bed 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -480,16 +480,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
atomic_set(&p->tcfa_bindcnt, 1);
if (cpustats) {
- p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
if (!p->cpu_bstats)
goto err1;
- p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
if (!p->cpu_bstats_hw)
goto err2;
p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
if (!p->cpu_qstats)
goto err3;
}
+ gnet_stats_basic_sync_init(&p->tcfa_bstats);
+ gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
spin_lock_init(&p->tcfa_lock);
p->tcfa_index = index;
p->tcfa_tm.install = jiffies;
@@ -499,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
- &p->tcfa_lock, NULL, est);
+ &p->tcfa_lock, false, est);
if (err)
goto err4;
}
@@ -1126,13 +1128,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
u64 drops, bool hw)
{
if (a->cpu_bstats) {
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+ _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
this_cpu_ptr(a->cpu_qstats)->drops += drops;
if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ _bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
+ bytes, packets);
return;
}
@@ -1171,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
- gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
- &p->tcfa_bstats_hw) < 0 ||
+ if (gnet_stats_copy_basic(&d, p->cpu_bstats,
+ &p->tcfa_bstats, false) < 0 ||
+ gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
+ &p->tcfa_bstats_hw, false) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5c36013339e1..f2bf896331a5 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
int action, filter_res;
tcf_lastuse_update(&prog->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
filter = rcu_dereference(prog->filter);
if (at_ingress) {
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index ad9df0cb4b98..90866ae45573 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -960,6 +960,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tmpl = p->tmpl;
tcf_lastuse_update(&c->tcf_tm);
+ tcf_action_update_bstats(&c->common, skb);
if (clear) {
qdisc_skb_cb(skb)->post_ct = false;
@@ -1049,7 +1050,6 @@ out_push:
qdisc_skb_cb(skb)->post_ct = true;
out_clear:
- tcf_action_update_bstats(&c->common, skb);
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 7064a365a1a9..b757f90a2d58 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u8 *tlv_data;
u16 metalen;
- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
if (skb_at_tc_ingress(skb))
@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
exceed_mtu = true;
}
- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index e4529b428cf4..8faa4c58305e 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
int ret, mac_len;
tcf_lastuse_update(&m->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
/* Ensure 'data' points at mac_header prior calling mpls manipulating
* functions.
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 832157a840fc..9e77ba8401e5 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
police->common.cpu_bstats,
&police->tcf_rate_est,
&police->tcf_lock,
- NULL, est);
+ false, est);
if (err)
goto failure;
} else if (tb[TCA_POLICE_AVRATE] &&
@@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
int ret;
tcf_lastuse_update(&police->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
ret = READ_ONCE(police->tcf_action);
p = rcu_dereference_bh(police->params);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 230501eb9e06..ce859b0e0deb 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
int retval;
tcf_lastuse_update(&s->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
retval = READ_ONCE(s->tcf_action);
psample_group = rcu_dereference_bh(s->psample_group);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index cbbe1861d3a2..e617ab4505ca 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
* then it would look like "hello_3" (without quotes)
*/
pr_info("simple: %s_%llu\n",
- (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+ (char *)d->tcfd_defdata,
+ u64_stats_read(&d->tcf_bstats.packets));
spin_unlock(&d->tcf_lock);
return d->tcf_action;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 605418538347..d30ecbfc8f84 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
int action;
tcf_lastuse_update(&d->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
params = rcu_dereference_bh(d->params);
action = READ_ONCE(d->tcf_action);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index ecb9ee666095..9b6b52c5e24e 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
u64 flags;
tcf_lastuse_update(&d->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
action = READ_ONCE(d->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 23b21253b3c3..aab13ba11767 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -329,7 +329,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
ARRAY_SIZE(fl_ct_info_to_flower_map),
post_ct);
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
- skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
+ skb_flow_dissect(skb, &mask->dissector, &skb_key,
+ FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
f = fl_mask_lookup(mask, &skb_key);
if (f && !tc_skip_sw(f->flags)) {
@@ -2188,18 +2189,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
arg->count = arg->skip;
+ rcu_read_lock();
idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
/* don't return filters that are being deleted */
if (!refcount_inc_not_zero(&f->refcnt))
continue;
+ rcu_read_unlock();
+
if (arg->fn(tp, f, arg) < 0) {
__fl_put(f);
arg->stop = 1;
+ rcu_read_lock();
break;
}
__fl_put(f);
arg->count++;
+ rcu_read_lock();
}
+ rcu_read_unlock();
arg->cookie = id;
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 46254968d390..0a04468b7314 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk->sk_forward_alloc;
+ dst->value = sk_forward_alloc_get(sk);
}
META_COLLECTOR(int_sk_sndbuf)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5e90e9b160e3..efcd0b5e9a32 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -507,20 +507,27 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
list_for_each_entry(stab, &qdisc_stab_list, list) {
if (memcmp(&stab->szopts, s, sizeof(*s)))
continue;
- if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
+ if (tsize > 0 &&
+ memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
continue;
stab->refcnt++;
return stab;
}
- stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
+ if (s->size_log > STAB_SIZE_LOG_MAX ||
+ s->cell_log > STAB_SIZE_LOG_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
+ return ERR_PTR(-EINVAL);
+ }
+
+ stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
stab->refcnt = 1;
stab->szopts = *s;
if (tsize > 0)
- memcpy(stab->data, tab, tsize * sizeof(u16));
+ memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
list_add_tail(&stab->list, &qdisc_stab_list);
@@ -878,7 +885,7 @@ static void qdisc_offload_graft_root(struct net_device *dev,
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 portid, u32 seq, u16 flags, int event)
{
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
struct gnet_stats_queue __percpu *cpu_qstats = NULL;
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -936,8 +943,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
cpu_qstats = q->cpu_qstats;
}
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
- &d, cpu_bstats, &q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
@@ -1258,26 +1264,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
- seqcount_t *running;
-
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
goto err_out4;
}
- if (sch->parent != TC_H_ROOT &&
- !(sch->flags & TCQ_F_INGRESS) &&
- (!p || !(p->flags & TCQ_F_MQROOT)))
- running = qdisc_root_sleeping_running(sch);
- else
- running = &sch->running;
-
err = gen_new_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
NULL,
- running,
+ true,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
@@ -1353,7 +1350,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
sch->cpu_bstats,
&sch->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
}
out:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 7d8518176b45..4c8e994cf0a5 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
struct atm_qdisc_data *parent; /* parent qdisc */
struct socket *sock; /* for closing */
int ref; /* reference count */
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct list_head list;
struct atm_flow_data *excess; /* flow for excess traffic;
@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
INIT_LIST_HEAD(&p->flows);
INIT_LIST_HEAD(&p->link.list);
+ gnet_stats_basic_sync_init(&p->link.bstats);
list_add(&p->link.list, &p->flows);
p->link.q = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, sch->handle, extack);
@@ -652,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &flow->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index e0da15530f0e..02d9f0dfe356 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -116,7 +116,7 @@ struct cbq_class {
long avgidle;
long deficit; /* Saved deficit for WRR */
psched_time_t penalized;
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct tc_cbq_xstats xstats;
@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q)
long avgidle = cl->avgidle;
long idle;
- cl->bstats.packets++;
- cl->bstats.bytes += len;
+ _bstats_update(&cl->bstats, len, 1);
/*
* (now - last) is total time between packet right edges.
@@ -1384,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
@@ -1519,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
@@ -1611,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
+ gnet_stats_basic_sync_init(&cl->bstats);
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
- tca[TCA_RATE]);
+ NULL, true, tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
tcf_block_put(cl->block);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 642cd179b7a7..18e4f7a0b291 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -19,7 +19,7 @@ struct drr_class {
struct Qdisc_class_common common;
unsigned int filter_cnt;
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct list_head alist;
@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
+ NULL, true,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to replace estimator");
@@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ gnet_stats_basic_sync_init(&cl->bstats);
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
- tca[TCA_RATE]);
+ NULL, true, tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to replace estimator");
qdisc_put(cl->qdisc);
@@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (qlen)
xstats.deficit = cl->deficit;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
return -1;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 1f857ffd1ac2..0eae9ff5edf6 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -41,7 +41,7 @@ struct ets_class {
struct Qdisc *qdisc;
u32 quantum;
u32 deficit;
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
};
@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
struct ets_class *cl = ets_class_from_arg(sch, arg);
struct Qdisc *cl_q = cl->qdisc;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
qdisc_qstats_copy(d, cl_q) < 0)
return -1;
@@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->nbands = nbands;
for (i = nstrict; i < q->nstrict; i++) {
- INIT_LIST_HEAD(&q->classes[i].alist);
if (q->classes[i].qdisc->q.qlen) {
list_add_tail(&q->classes[i].alist, &q->active);
q->classes[i].deficit = quanta[i];
@@ -687,7 +685,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
ets_offload_change(sch);
for (i = q->nbands; i < oldbands; i++) {
qdisc_put(q->classes[i].qdisc);
- memset(&q->classes[i], 0, sizeof(q->classes[i]));
+ q->classes[i].qdisc = NULL;
+ q->classes[i].quantum = 0;
+ q->classes[i].deficit = 0;
+ gnet_stats_basic_sync_init(&q->classes[i].bstats);
+ memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
}
return 0;
}
@@ -696,7 +698,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct ets_sched *q = qdisc_priv(sch);
- int err;
+ int err, i;
if (!opt)
return -EINVAL;
@@ -706,6 +708,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
return err;
INIT_LIST_HEAD(&q->active);
+ for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
+ INIT_LIST_HEAD(&q->classes[i].alist);
+
return ets_qdisc_change(sch, opt, extack);
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index a579a4131d22..e1040421b797 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -233,6 +233,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
+ if (!q->ops->change)
+ return 0;
+
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index bb0cd6d3d2c2..839e1235db05 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -362,6 +362,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 },
[TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 },
+ [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
@@ -408,6 +410,11 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])
+ q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]);
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])
+ q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]);
+
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
@@ -544,10 +551,15 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
- if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
- nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
- codel_time_to_us(q->cparams.ce_threshold)))
- goto nla_put_failure;
+ if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
+ if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->cparams.ce_threshold)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
+ goto nla_put_failure;
+ }
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a8dd06c74e31..3b0f62095803 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -304,8 +304,8 @@ trace:
/*
* Transmit possibly several skbs, and handle the return status as
- * required. Owning running seqcount bit guarantees that
- * only one CPU can execute this function.
+ * required. Owning qdisc running bit guarantees that only one CPU
+ * can execute this function.
*
* Returns to the caller:
* false - hardware queue frozen backoff
@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
- .running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
.gso_skb = {
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
@@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
qdisc_skb_head_init(&sch->q);
+ gnet_stats_basic_sync_init(&sch->bstats);
spin_lock_init(&sch->q.lock);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
if (!sch->cpu_bstats)
goto errout1;
@@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->seqlock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
- seqcount_init(&sch->running);
- lockdep_set_class(&sch->running,
- dev->qdisc_running_key ?: &qdisc_running_key);
-
sch->ops = ops;
sch->flags = ops->static_flags;
sch->enqueue = ops->enqueue;
@@ -1330,6 +1325,39 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
return 0;
}
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+ unsigned int new_real_tx)
+{
+ struct Qdisc *qdisc = dev->qdisc;
+
+ if (qdisc->ops->change_real_num_tx)
+ qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
+}
+
+void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
+{
+#ifdef CONFIG_NET_SCHED
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+ unsigned int i;
+
+ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ /* Only update the default qdiscs we created,
+ * qdiscs with handles are always hashed.
+ */
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_del(qdisc);
+ }
+ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_add(qdisc, false);
+ }
+#endif
+}
+EXPORT_SYMBOL(mq_change_real_num_tx);
+
int dev_qdisc_change_tx_queue_len(struct net_device *dev)
{
bool up = dev->flags & IFF_UP;
@@ -1459,10 +1487,6 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
}
EXPORT_SYMBOL(psched_ppscfg_precompute);
-static void mini_qdisc_rcu_func(struct rcu_head *head)
-{
-}
-
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
{
@@ -1475,28 +1499,30 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
- /* Wait for flying RCU callback before it is freed. */
- rcu_barrier();
- return;
- }
+ } else {
+ miniq = miniq_old != &miniqp->miniq1 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
- miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
- &miniqp->miniq1 : &miniqp->miniq2;
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So ensure that at least one RCU
+ * grace period has elapsed since the miniq was made
+ * inactive.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ cond_synchronize_rcu(miniq->rcu_state);
+ else if (!poll_state_synchronize_rcu(miniq->rcu_state))
+ synchronize_rcu_expedited();
- /* We need to make sure that readers won't see the miniq
- * we are about to modify. So wait until previous call_rcu callback
- * is done.
- */
- rcu_barrier();
- miniq->filter_list = tp_head;
- rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ }
if (miniq_old)
- /* This is counterpart of the rcu barriers above. We need to
+ /* This is counterpart of the rcu sync above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
- call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
+ miniq_old->rcu_state = start_poll_synchronize_rcu();
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
@@ -1515,6 +1541,8 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq1.rcu_state = get_state_synchronize_rcu();
+ miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state;
miniqp->p_miniq = p_miniq;
}
EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 621dc6afde8f..1073c76d05c4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -56,6 +56,7 @@ struct gred_sched {
u32 DPs;
u32 def;
struct red_vars wred_set;
+ struct tc_gred_qopt_offload *opt;
};
static inline int gred_wred_mode(struct gred_sched *table)
@@ -311,48 +312,50 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
{
struct gred_sched *table = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct tc_gred_qopt_offload opt = {
- .command = command,
- .handle = sch->handle,
- .parent = sch->parent,
- };
+ struct tc_gred_qopt_offload *opt = table->opt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
+ memset(opt, 0, sizeof(*opt));
+ opt->command = command;
+ opt->handle = sch->handle;
+ opt->parent = sch->parent;
+
if (command == TC_GRED_REPLACE) {
unsigned int i;
- opt.set.grio_on = gred_rio_mode(table);
- opt.set.wred_on = gred_wred_mode(table);
- opt.set.dp_cnt = table->DPs;
- opt.set.dp_def = table->def;
+ opt->set.grio_on = gred_rio_mode(table);
+ opt->set.wred_on = gred_wred_mode(table);
+ opt->set.dp_cnt = table->DPs;
+ opt->set.dp_def = table->def;
for (i = 0; i < table->DPs; i++) {
struct gred_sched_data *q = table->tab[i];
if (!q)
continue;
- opt.set.tab[i].present = true;
- opt.set.tab[i].limit = q->limit;
- opt.set.tab[i].prio = q->prio;
- opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
- opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
- opt.set.tab[i].is_ecn = gred_use_ecn(q);
- opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
- opt.set.tab[i].probability = q->parms.max_P;
- opt.set.tab[i].backlog = &q->backlog;
+ opt->set.tab[i].present = true;
+ opt->set.tab[i].limit = q->limit;
+ opt->set.tab[i].prio = q->prio;
+ opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+ opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+ opt->set.tab[i].is_ecn = gred_use_ecn(q);
+ opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
+ opt->set.tab[i].probability = q->parms.max_P;
+ opt->set.tab[i].backlog = &q->backlog;
}
- opt.set.qstats = &sch->qstats;
+ opt->set.qstats = &sch->qstats;
}
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
}
static int gred_offload_dump_stats(struct Qdisc *sch)
{
struct gred_sched *table = qdisc_priv(sch);
struct tc_gred_qopt_offload *hw_stats;
+ u64 bytes = 0, packets = 0;
unsigned int i;
int ret;
@@ -364,9 +367,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
hw_stats->handle = sch->handle;
hw_stats->parent = sch->parent;
- for (i = 0; i < MAX_DPs; i++)
+ for (i = 0; i < MAX_DPs; i++) {
+ gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
if (table->tab[i])
hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+ }
ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
/* Even if driver returns failure adjust the stats - in case offload
@@ -375,19 +380,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
for (i = 0; i < MAX_DPs; i++) {
if (!table->tab[i])
continue;
- table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
- table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+ table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
+ table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
- _bstats_update(&sch->bstats,
- hw_stats->stats.bstats[i].bytes,
- hw_stats->stats.bstats[i].packets);
+ bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
+ packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
sch->qstats.drops += hw_stats->stats.qstats[i].drops;
sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
}
+ _bstats_update(&sch->bstats, bytes, packets);
kfree(hw_stats);
return ret;
@@ -728,6 +733,7 @@ err_unlock_free:
static int gred_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ struct gred_sched *table = qdisc_priv(sch);
struct nlattr *tb[TCA_GRED_MAX + 1];
int err;
@@ -751,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
sch->limit = qdisc_dev(sch)->tx_queue_len
* psched_mtu(qdisc_dev(sch));
+ if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
+ table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
+ if (!table->opt)
+ return -ENOMEM;
+ }
+
return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
}
@@ -907,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch)
gred_destroy_vq(table->tab[i]);
}
gred_offload(sch, TC_GRED_DESTROY);
+ kfree(table->opt);
}
static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ac30cca035..d3979a6000e7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -111,7 +111,7 @@ enum hfsc_class_flags {
struct hfsc_class {
struct Qdisc_class_common cl_common;
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
if (err)
return err;
@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
- tca[TCA_RATE]);
+ NULL, true, tca[TCA_RATE]);
if (err) {
tcf_block_put(cl->block);
kfree(cl);
@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.work = cl->cl_total;
xstats.rtwork = cl->cl_cumul;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
@@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
if (err)
return err;
+ gnet_stats_basic_sync_init(&q->root.bstats);
q->root.cl_common.classid = sch->handle;
q->root.sched = q;
q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5067a6e5d4fd..9267922ea9c3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -113,8 +113,8 @@ struct htb_class {
/*
* Written often fields
*/
- struct gnet_stats_basic_packed bstats;
- struct gnet_stats_basic_packed bstats_bias;
+ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_basic_sync bstats_bias;
struct tc_htb_xstats xstats; /* our special stats */
/* token bucket parameters */
@@ -1084,11 +1084,15 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
if (offload) {
- if (sch->parent != TC_H_ROOT)
+ if (sch->parent != TC_H_ROOT) {
+ NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
return -EOPNOTSUPP;
+ }
- if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
+ NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
return -EOPNOTSUPP;
+ }
q->num_direct_qdiscs = dev->real_num_tx_queues;
q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
@@ -1308,10 +1312,11 @@ nla_put_failure:
static void htb_offload_aggregate_stats(struct htb_sched *q,
struct htb_class *cl)
{
+ u64 bytes = 0, packets = 0;
struct htb_class *c;
unsigned int i;
- memset(&cl->bstats, 0, sizeof(cl->bstats));
+ gnet_stats_basic_sync_init(&cl->bstats);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
@@ -1323,14 +1328,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q,
if (p != cl)
continue;
- cl->bstats.bytes += c->bstats_bias.bytes;
- cl->bstats.packets += c->bstats_bias.packets;
+ bytes += u64_stats_read(&c->bstats_bias.bytes);
+ packets += u64_stats_read(&c->bstats_bias.packets);
if (c->level == 0) {
- cl->bstats.bytes += c->leaf.q->bstats.bytes;
- cl->bstats.packets += c->leaf.q->bstats.packets;
+ bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
+ packets += u64_stats_read(&c->leaf.q->bstats.packets);
}
}
}
+ _bstats_update(&cl->bstats, bytes, packets);
}
static int
@@ -1357,16 +1363,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (cl->leaf.q)
cl->bstats = cl->leaf.q->bstats;
else
- memset(&cl->bstats, 0, sizeof(cl->bstats));
- cl->bstats.bytes += cl->bstats_bias.bytes;
- cl->bstats.packets += cl->bstats_bias.packets;
+ gnet_stats_basic_sync_init(&cl->bstats);
+ _bstats_update(&cl->bstats,
+ u64_stats_read(&cl->bstats_bias.bytes),
+ u64_stats_read(&cl->bstats_bias.packets));
} else {
htb_offload_aggregate_stats(q, cl);
}
}
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
return -1;
@@ -1578,8 +1584,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
WARN_ON(old != q);
if (cl->parent) {
- cl->parent->bstats_bias.bytes += q->bstats.bytes;
- cl->parent->bstats_bias.packets += q->bstats.packets;
+ _bstats_update(&cl->parent->bstats_bias,
+ u64_stats_read(&q->bstats.bytes),
+ u64_stats_read(&q->bstats.packets));
}
offload_opt = (struct tc_htb_qopt_offload) {
@@ -1849,6 +1856,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
+ gnet_stats_basic_sync_init(&cl->bstats);
+ gnet_stats_basic_sync_init(&cl->bstats_bias);
+
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
@@ -1858,7 +1868,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE] ? : &est.nla);
if (err)
goto err_block_put;
@@ -1922,8 +1932,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
htb_graft_helper(dev_queue, old_q);
goto err_kill_estimator;
}
- parent->bstats_bias.bytes += old_q->bstats.bytes;
- parent->bstats_bias.packets += old_q->bstats.packets;
+ _bstats_update(&parent->bstats_bias,
+ u64_stats_read(&old_q->bstats.bytes),
+ u64_stats_read(&old_q->bstats.packets));
qdisc_put(old_q);
}
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@@ -1983,7 +1994,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
if (err)
return err;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index e79f1afe0cfd..83d2e54bf303 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,10 +130,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
unsigned int ntx;
- __u32 qlen = 0;
sch->q.qlen = 0;
- memset(&sch->bstats, 0, sizeof(sch->bstats));
+ gnet_stats_basic_sync_init(&sch->bstats);
memset(&sch->qstats, 0, sizeof(sch->qstats));
/* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -145,25 +144,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- if (qdisc_is_percpu_stats(qdisc)) {
- qlen = qdisc_qlen_sum(qdisc);
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- qdisc->cpu_bstats,
- &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- qdisc->cpu_qstats,
- &qdisc->qstats, qlen);
- sch->q.qlen += qlen;
- } else {
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.qlen += qdisc->qstats.qlen;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
- }
+ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+ &qdisc->bstats, false);
+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
}
@@ -246,8 +231,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
- &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
@@ -288,6 +272,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = {
.init = mq_init,
.destroy = mq_destroy,
.attach = mq_attach,
+ .change_real_num_tx = mq_change_real_num_tx,
.dump = mq_dump,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 8766ab5b8788..b29f3453c6ea 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
unsigned int ntx, tc;
sch->q.qlen = 0;
- memset(&sch->bstats, 0, sizeof(sch->bstats));
+ gnet_stats_basic_sync_init(&sch->bstats);
memset(&sch->qstats, 0, sizeof(sch->qstats));
/* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -402,25 +402,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- if (qdisc_is_percpu_stats(qdisc)) {
- __u32 qlen = qdisc_qlen_sum(qdisc);
-
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- qdisc->cpu_bstats,
- &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- qdisc->cpu_qstats,
- &qdisc->qstats, qlen);
- sch->q.qlen += qlen;
- } else {
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
- }
+ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+ &qdisc->bstats, false);
+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
}
@@ -512,12 +498,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
{
if (cl >= TC_H_MIN_PRIORITY) {
int i;
- __u32 qlen = 0;
+ __u32 qlen;
struct gnet_stats_queue qstats = {0};
- struct gnet_stats_basic_packed bstats = {0};
+ struct gnet_stats_basic_sync bstats;
struct net_device *dev = qdisc_dev(sch);
struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
+ gnet_stats_basic_sync_init(&bstats);
/* Drop lock here it will be reclaimed before touching
* statistics this is required because the d->lock we
* hold here is the look on dev_queue->qdisc_sleeping
@@ -529,37 +516,31 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
spin_lock_bh(qdisc_lock(qdisc));
- if (qdisc_is_percpu_stats(qdisc)) {
- cpu_bstats = qdisc->cpu_bstats;
- cpu_qstats = qdisc->cpu_qstats;
- }
- qlen = qdisc_qlen_sum(qdisc);
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- cpu_bstats, &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- cpu_qstats,
- &qdisc->qstats,
- qlen);
+ gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+ &qdisc->bstats, false);
+ gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
+
spin_unlock_bh(qdisc_lock(qdisc));
}
+ qlen = qdisc_qlen(sch) + qstats.qlen;
/* Reclaim root sleeping lock before completing stats */
if (d->lock)
spin_lock_bh(d->lock);
- if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
return -1;
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
- sch->cpu_bstats, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, sch->cpu_bstats,
+ &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
}
@@ -623,6 +604,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
.init = mqprio_init,
.destroy = mqprio_destroy,
.attach = mqprio_attach,
+ .change_real_num_tx = mq_change_real_num_tx,
.dump = mqprio_dump,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e282e7382117..cd8ab90c4765 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
qdisc_qstats_copy(d, cl_q) < 0)
return -1;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 0c345e43a09a..ecbb10db1111 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
if (!n || n > NETEM_DIST_MAX)
return -EINVAL;
- d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
+ d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
if (!d)
return -ENOMEM;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 03fdf31ccb6a..3b8d7197c06b 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
+ &cl_q->bstats, true) < 0 ||
qdisc_qstats_copy(d, cl_q) < 0)
return -1;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 58a9d42b52b8..0b7f9ba28deb 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -131,7 +131,7 @@ struct qfq_class {
unsigned int filter_cnt;
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct Qdisc *qdisc;
@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
if (err)
return err;
@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ gnet_stats_basic_sync_init(&cl->bstats);
cl->common.classid = classid;
cl->deficit = lmax;
@@ -477,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
NULL,
- qdisc_root_sleeping_running(sch),
+ true,
tca[TCA_RATE]);
if (err)
goto destroy_class;
@@ -639,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
qdisc_qstats_copy(d, cl->qdisc) < 0)
return -1;
@@ -1234,8 +1234,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- cl->bstats.bytes += len;
- cl->bstats.packets += gso_segs;
+ _bstats_update(&cl->bstats, len, gso_segs);
sch->qstats.backlog += len;
++sch->q.qlen;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1ab2fc933a21..377f896bdedc 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -95,18 +95,22 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched)
return ns_to_ktime(sched->base_time);
}
-static ktime_t taprio_get_time(struct taprio_sched *q)
+static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
{
- ktime_t mono = ktime_get();
+ /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
+ enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
- switch (q->tk_offset) {
+ switch (tk_offset) {
case TK_OFFS_MAX:
return mono;
default:
- return ktime_mono_to_any(mono, q->tk_offset);
+ return ktime_mono_to_any(mono, tk_offset);
}
+}
- return KTIME_MAX;
+static ktime_t taprio_get_time(const struct taprio_sched *q)
+{
+ return taprio_mono_to_any(q, ktime_get());
}
static void taprio_free_sched_cb(struct rcu_head *head)
@@ -319,7 +323,7 @@ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
return 0;
}
- return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset);
+ return taprio_mono_to_any(q, skb->skb_mstamp_ns);
}
/* There are a few scenarios where we will have to modify the txtime from
@@ -1352,6 +1356,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
}
} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+ enum tk_offsets tk_offset;
/* We only support static clockids and we don't allow
* for it to be modified after the first init.
@@ -1366,22 +1371,24 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
switch (clockid) {
case CLOCK_REALTIME:
- q->tk_offset = TK_OFFS_REAL;
+ tk_offset = TK_OFFS_REAL;
break;
case CLOCK_MONOTONIC:
- q->tk_offset = TK_OFFS_MAX;
+ tk_offset = TK_OFFS_MAX;
break;
case CLOCK_BOOTTIME:
- q->tk_offset = TK_OFFS_BOOT;
+ tk_offset = TK_OFFS_BOOT;
break;
case CLOCK_TAI:
- q->tk_offset = TK_OFFS_TAI;
+ tk_offset = TK_OFFS_TAI;
break;
default:
NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
err = -EINVAL;
goto out;
}
+ /* This pairs with READ_ONCE() in taprio_mono_to_any */
+ WRITE_ONCE(q->tk_offset, tk_offset);
q->clockid = clockid;
} else {
@@ -1641,6 +1648,10 @@ static void taprio_destroy(struct Qdisc *sch)
list_del(&q->taprio_list);
spin_unlock(&taprio_list_lock);
+ /* Note that taprio_reset() might not be called if an error
+ * happens in qdisc_create(), after taprio_init() has been called.
+ */
+ hrtimer_cancel(&q->advance_timer);
taprio_disable_offload(dev, q, NULL);
@@ -1973,7 +1984,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 78e79029dc63..72102277449e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -184,6 +184,20 @@ static int tbf_offload_dump(struct Qdisc *sch)
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
}
+static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
+ struct Qdisc *old, struct netlink_ext_ack *extack)
+{
+ struct tc_tbf_qopt_offload graft_offload = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ .child_handle = new->handle,
+ .command = TC_TBF_GRAFT,
+ };
+
+ qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
+ TC_SETUP_QDISC_TBF, &graft_offload, extack);
+}
+
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@@ -547,6 +561,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->qdisc);
+
+ tbf_offload_graft(sch, new, *old, extack);
return 0;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5ef86fdb1176..1f1786021d9c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(_ch))
+ if (!ch || ntohs(ch->length) < sizeof(_ch))
break;
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4dfb5ea82b05..cdfdbd353c67 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -581,13 +581,16 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk;
- /* check gso */
if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
- if (!sk_can_gso(sk)) {
- pr_err_once("Trying to GSO but underlying device doesn't support it.");
- goto out;
+ if (tp->pl.state == SCTP_PL_ERROR) { /* do IP fragmentation if in Error state */
+ packet->ipfragok = 1;
+ } else {
+ if (!sk_can_gso(sk)) { /* check gso */
+ pr_err_once("Trying to GSO but underlying device doesn't support it.");
+ goto out;
+ }
+ gso = 1;
}
- gso = 1;
}
/* alloc head skb */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ec0f52567c16..35928fefae33 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -33,7 +33,6 @@
#include <linux/seq_file.h>
#include <linux/memblock.h>
#include <linux/highmem.h>
-#include <linux/swap.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b8fa8f1a7277..c7503fd64915 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3697,7 +3697,7 @@ struct sctp_chunk *sctp_make_strreset_req(
outlen = (sizeof(outreq) + stream_len) * out;
inlen = (sizeof(inreq) + stream_len) * in;
- retval = sctp_make_reconf(asoc, outlen + inlen);
+ retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen));
if (!retval)
return NULL;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 32df65f68c12..39ba82ee87ce 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
void *arg,
struct sctp_cmd_seq *commands);
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands);
+
/* Small helper function that checks if the chunk length
* is of the appropriate length. The 'required_length' argument
* is set to be the size of a specific chunk we are testing.
@@ -320,11 +326,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
struct sctp_packet *packet;
int len;
- /* Update socket peer label if first association. */
- if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
- chunk->skb))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -337,6 +338,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length.
+ * Normally, this would cause an ABORT with a Protocol Violation
+ * error, but since we don't have an association, we'll
+ * just discard the packet.
+ */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -351,14 +360,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * Normally, this would cause an ABORT with a Protocol Violation
- * error, but since we don't have an association, we'll
- * just discard the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
* backloged to the socket at the same time as the user issues close().
@@ -409,6 +410,12 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (!new_asoc)
goto nomem;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+
if (sctp_assoc_set_bind_addr_from_ep(new_asoc,
sctp_scope(sctp_source(chunk)),
GFP_ATOMIC) < 0)
@@ -704,6 +711,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
struct sock *sk;
int error = 0;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -718,7 +728,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
* in sctp_unpack_cookie().
*/
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
/* If the endpoint is not listening or if the number of associations
* on the TCP-style socket exceed the max backlog, respond with an
@@ -770,6 +781,10 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
}
}
+ if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
/* Delay state machine commands until later.
*
@@ -931,7 +946,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
/* Set peer label for connection. */
- security_inet_conn_established(ep->base.sk, chunk->skb);
+ security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb);
/* RFC 2960 5.1 Normal Establishment of an Association
*
@@ -1507,11 +1522,6 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
struct sctp_packet *packet;
int len;
- /* Update socket peer label if first association. */
- if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
- chunk->skb))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -1524,20 +1534,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * In this case, we generate a protocol violation since we have
- * an association established.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
-
if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port)
return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands);
@@ -1588,6 +1594,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
if (!new_asoc)
goto nomem;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+
if (sctp_assoc_set_bind_addr_from_ep(new_asoc,
sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0)
goto nomem;
@@ -1882,9 +1894,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
* its peer.
*/
if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
- disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
- SCTP_ST_CHUNK(chunk->chunk_hdr->type),
- chunk, commands);
+ disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc,
+ SCTP_ST_CHUNK(chunk->chunk_hdr->type),
+ chunk, commands);
if (SCTP_DISPOSITION_NOMEM == disposition)
goto nomem;
@@ -2202,9 +2214,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
* enough for the chunk header. Cookie length verification is
* done later.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) {
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands);
+ }
/* "Decode" the chunk. We have no optional parameters so we
* are in good shape.
@@ -2247,8 +2261,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
}
/* Update socket peer label if first association. */
- if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
- chunk->skb)) {
+ if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
@@ -2341,7 +2354,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2387,7 +2400,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2657,7 +2670,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2970,13 +2983,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-enum sctp_disposition sctp_sf_do_9_2_reshutack(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const union sctp_subtype type,
- void *arg,
- struct sctp_cmd_seq *commands)
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
@@ -3010,6 +3021,26 @@ nomem:
return SCTP_DISPOSITION_NOMEM;
}
+enum sctp_disposition
+sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *chunk = arg;
+
+ if (!chunk->singleton)
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (chunk->sctp_hdr->vtag != 0)
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
+
+ return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands);
+}
+
/*
* sctp_sf_do_ecn_cwr
*
@@ -3662,6 +3693,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
@@ -3777,12 +3811,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
- /* If the chunk length is invalid, we don't want to process
- * the reset of the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* We need to discard the rest of the packet to prevent
* potential boomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
@@ -3810,6 +3838,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -3845,6 +3876,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ASCONF ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP: Section 4.1.1
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3853,13 +3889,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
*/
if (!asoc->peer.asconf_capable ||
(!net->sctp.addip_noauth && !chunk->auth))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ASCONF ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
hdr = (struct sctp_addiphdr *)chunk->skb->data;
serial = ntohl(hdr->serial);
@@ -3988,6 +4018,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(asconf_ack,
+ sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP, Section 4.1.2:
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3996,14 +4032,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
*/
if (!asoc->peer.asconf_capable ||
(!net->sctp.addip_noauth && !asconf_ack->auth))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(asconf_ack,
- sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
rcvd_serial = ntohl(addip_hdr->serial);
@@ -4575,6 +4604,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length.
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
@@ -4642,6 +4674,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -4863,9 +4898,6 @@ static enum sctp_disposition sctp_sf_violation_chunk(
{
static const char err_str[] = "The following chunk violates protocol:";
- if (!asoc)
- return sctp_sf_violation(net, ep, asoc, type, arg, commands);
-
return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
@@ -6348,6 +6380,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(
* yet.
*/
switch (chunk->chunk_hdr->type) {
+ case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
{
struct sctp_initack_chunk *initack;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b937bfd4751..33391254fa82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9412,7 +9412,6 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct inet_sock *newinet;
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_endpoint *ep = sp->ep;
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
@@ -9457,9 +9456,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
net_enable_timestamp();
/* Set newsk security attributes from original sk and connection
- * security attribute from ep.
+ * security attribute from asoc.
*/
- security_sctp_sk_clone(ep, sk, newsk);
+ security_sctp_sk_clone(asoc, sk, newsk);
}
static inline void sctp_copy_descendant(struct sock *sk_to,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a3d3ca6dd63d..133f1719bf1b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -269,7 +269,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t)
if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
- t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
sctp_assoc_sync_pmtu(t->asoc);
}
@@ -366,8 +366,9 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
- t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ return true;
}
} else if (t->pl.state == SCTP_PL_SEARCH) {
if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
@@ -378,11 +379,10 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
t->pl.probe_high = 0;
t->pl.pmtu = SCTP_BASE_PLPMTU;
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ return true;
} else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
t->pl.probe_size = pmtu;
t->pl.probe_count = 0;
-
- return false;
}
} else if (t->pl.state == SCTP_PL_COMPLETE) {
if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
@@ -393,10 +393,11 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
t->pl.probe_high = 0;
t->pl.pmtu = SCTP_BASE_PLPMTU;
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ return true;
}
}
- return true;
+ return false;
}
bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 99a0186cba5b..196fb6f01b14 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y += -I$(src)
obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
+smc-y += smc_tracepoint.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c038efc23ce3..59284da9116d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -50,6 +50,7 @@
#include "smc_rx.h"
#include "smc_close.h"
#include "smc_stats.h"
+#include "smc_tracepoint.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -148,14 +149,18 @@ static int __smc_release(struct smc_sock *smc)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
} else {
- if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
- sock_put(sk); /* passive closing */
- if (sk->sk_state == SMC_LISTEN) {
- /* wake up clcsock accept */
- rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
+ if (sk->sk_state != SMC_CLOSED) {
+ if (sk->sk_state != SMC_LISTEN &&
+ sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
+ if (sk->sk_state == SMC_LISTEN) {
+ /* wake up clcsock accept */
+ rc = kernel_sock_shutdown(smc->clcsock,
+ SHUT_RDWR);
+ }
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
}
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
smc_restore_fallback_changes(smc);
}
@@ -439,6 +444,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
return 0;
}
+static bool smc_isascii(char *hostname)
+{
+ int i;
+
+ for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
+ if (!isascii(hostname[i]))
+ return false;
+ return true;
+}
+
+static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)clc;
+ struct smc_clc_first_contact_ext *fce;
+ int clc_v2_len;
+
+ if (clc->hdr.version == SMC_V1 ||
+ !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return;
+
+ if (smc->conn.lgr->is_smcd) {
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
+ SMC_MAX_EID_LEN);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
+ d1);
+ } else {
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
+ SMC_MAX_EID_LEN);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
+ r1);
+ }
+ fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
+ smc->conn.lgr->peer_os = fce->os_type;
+ smc->conn.lgr->peer_smc_release = fce->release;
+ if (smc_isascii(fce->hostname))
+ memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
+ SMC_MAX_HOSTNAME_LEN);
+}
+
static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -451,16 +497,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}
-static bool smc_isascii(char *hostname)
-{
- int i;
-
- for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
- if (!isascii(hostname[i]))
- return false;
- return true;
-}
-
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -472,22 +508,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
- if (clc->hdr.version > SMC_V1 &&
- (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)clc;
- struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
-
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
- SMC_MAX_EID_LEN);
- smc->conn.lgr->peer_os = fce->os_type;
- smc->conn.lgr->peer_smc_release = fce->release;
- if (smc_isascii(fce->hostname))
- memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
- SMC_MAX_HOSTNAME_LEN);
- }
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -497,14 +517,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
smcd_conn_save_peer_info(smc, clc);
else
smcr_conn_save_peer_info(smc, clc);
+ smc_conn_save_peer_info_fce(smc, clc);
}
static void smc_link_save_peer_info(struct smc_link *link,
- struct smc_clc_msg_accept_confirm *clc)
+ struct smc_clc_msg_accept_confirm *clc,
+ struct smc_init_info *ini)
{
link->peer_qpn = ntoh24(clc->r0.qpn);
- memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
- memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
+ memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
+ memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->r0.psn);
link->peer_mtu = clc->r0.qp_mtu;
}
@@ -547,6 +569,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
+ trace_smc_switch_to_fallback(smc, reason_code);
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
@@ -608,7 +631,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
* used for the internal TCP socket
*/
smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
- if (!ini->ib_dev)
+ if (!ini->check_smcrv2 && !ini->ib_dev)
+ return SMC_CLC_DECL_NOSMCRDEV;
+ if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
return SMC_CLC_DECL_NOSMCRDEV;
return 0;
}
@@ -692,27 +717,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
int rc = 0;
/* check if there is an ism device available */
- if (ini->smcd_version & SMC_V1) {
- if (smc_find_ism_device(smc, ini) ||
- smc_connect_ism_vlan_setup(smc, ini)) {
- if (ini->smc_type_v1 == SMC_TYPE_B)
- ini->smc_type_v1 = SMC_TYPE_R;
- else
- ini->smc_type_v1 = SMC_TYPE_N;
- } /* else ISM V1 is supported for this connection */
- if (smc_find_rdma_device(smc, ini)) {
- if (ini->smc_type_v1 == SMC_TYPE_B)
- ini->smc_type_v1 = SMC_TYPE_D;
- else
- ini->smc_type_v1 = SMC_TYPE_N;
- } /* else RDMA is supported for this connection */
- }
- if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini))
- ini->smc_type_v2 = SMC_TYPE_N;
+ if (!(ini->smcd_version & SMC_V1) ||
+ smc_find_ism_device(smc, ini) ||
+ smc_connect_ism_vlan_setup(smc, ini))
+ ini->smcd_version &= ~SMC_V1;
+ /* else ISM V1 is supported for this connection */
+
+ /* check if there is an rdma device available */
+ if (!(ini->smcr_version & SMC_V1) ||
+ smc_find_rdma_device(smc, ini))
+ ini->smcr_version &= ~SMC_V1;
+ /* else RDMA is supported for this connection */
+
+ ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
+ ini->smcr_version & SMC_V1);
+
+ /* check if there is an ism v2 device available */
+ if (!(ini->smcd_version & SMC_V2) ||
+ !smc_ism_is_v2_capable() ||
+ smc_find_ism_v2_device_clnt(smc, ini))
+ ini->smcd_version &= ~SMC_V2;
+
+ /* check if there is an rdma v2 device available */
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
+ if (!(ini->smcr_version & SMC_V2) ||
+ smc->clcsock->sk->sk_family != AF_INET ||
+ !smc_clc_ueid_count() ||
+ smc_find_rdma_device(smc, ini))
+ ini->smcr_version &= ~SMC_V2;
+ ini->check_smcrv2 = false;
+
+ ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
+ ini->smcr_version & SMC_V2);
/* if neither ISM nor RDMA are supported, fallback */
- if (!smcr_indicated(ini->smc_type_v1) &&
- ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
+ if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
@@ -752,6 +792,64 @@ static int smc_connect_clc(struct smc_sock *smc,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
+void smc_fill_gid_list(struct smc_link_group *lgr,
+ struct smc_gidlist *gidlist,
+ struct smc_ib_device *known_dev, u8 *known_gid)
+{
+ struct smc_init_info *alt_ini = NULL;
+
+ memset(gidlist, 0, sizeof(*gidlist));
+ memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
+
+ alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
+ if (!alt_ini)
+ goto out;
+
+ alt_ini->vlan_id = lgr->vlan_id;
+ alt_ini->check_smcrv2 = true;
+ alt_ini->smcrv2.saddr = lgr->saddr;
+ smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
+
+ if (!alt_ini->smcrv2.ib_dev_v2)
+ goto out;
+
+ memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
+ SMC_GID_SIZE);
+
+out:
+ kfree(alt_ini);
+}
+
+static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
+ struct smc_clc_msg_accept_confirm *aclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ struct smc_clc_first_contact_ext *fce =
+ (struct smc_clc_first_contact_ext *)
+ (((u8 *)clc_v2) + sizeof(*clc_v2));
+
+ if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
+ return 0;
+
+ if (fce->v2_direct) {
+ memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
+ ini->smcrv2.uses_gateway = false;
+ } else {
+ if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
+ ini->smcrv2.nexthop_mac,
+ &ini->smcrv2.uses_gateway))
+ return SMC_CLC_DECL_NOROUTE;
+ if (!ini->smcrv2.uses_gateway) {
+ /* mismatch: peer claims indirect, but its direct */
+ return SMC_CLC_DECL_NOINDIRECT;
+ }
+ }
+ return 0;
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
@@ -759,11 +857,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
{
int i, reason_code = 0;
struct smc_link *link;
+ u8 *eid = NULL;
ini->is_smcd = false;
- ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
+ memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+
+ reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
+ if (reason_code)
+ return reason_code;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
@@ -785,8 +890,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
!memcmp(l->peer_gid, &aclc->r0.lcl.gid,
SMC_GID_SIZE) &&
- !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
- sizeof(l->peer_mac))) {
+ (aclc->hdr.version > SMC_V1 ||
+ !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
+ sizeof(l->peer_mac)))) {
link = l;
break;
}
@@ -805,7 +911,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
if (ini->first_contact_local)
- smc_link_save_peer_info(link, aclc);
+ smc_link_save_peer_info(link, aclc, ini);
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
@@ -828,8 +934,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
smc_rmb_sync_sg_for_device(&smc->conn);
+ if (aclc->hdr.version > SMC_V1) {
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+
+ eid = clc_v2->r1.eid;
+ if (ini->first_contact_local)
+ smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
+ link->smcibdev, link->gid);
+ }
+
reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
- SMC_V1);
+ aclc->hdr.version, eid, ini);
if (reason_code)
goto connect_abort;
@@ -869,7 +985,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
- if (ini->ism_chid[i] == ntohs(aclc->chid)) {
+ if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i;
return 0;
}
@@ -883,6 +999,7 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
+ u8 *eid = NULL;
int rc = 0;
ini->is_smcd = true;
@@ -918,8 +1035,15 @@ static int smc_connect_ism(struct smc_sock *smc,
smc_rx_init(smc);
smc_tx_init(smc);
+ if (aclc->hdr.version > SMC_V1) {
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+
+ eid = clc_v2->d1.eid;
+ }
+
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version);
+ aclc->hdr.version, eid, NULL);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -942,17 +1066,24 @@ connect_abort:
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
- if ((aclc->hdr.typev1 == SMC_TYPE_R &&
- !smcr_indicated(ini->smc_type_v1)) ||
- (aclc->hdr.typev1 == SMC_TYPE_D &&
- ((!smcd_indicated(ini->smc_type_v1) &&
- !smcd_indicated(ini->smc_type_v2)) ||
- (aclc->hdr.version == SMC_V1 &&
- !smcd_indicated(ini->smc_type_v1)) ||
- (aclc->hdr.version == SMC_V2 &&
- !smcd_indicated(ini->smc_type_v2)))))
+ if (aclc->hdr.typev1 != SMC_TYPE_R &&
+ aclc->hdr.typev1 != SMC_TYPE_D)
return SMC_CLC_DECL_MODEUNSUPP;
+ if (aclc->hdr.version >= SMC_V2) {
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
+ !smcr_indicated(ini->smc_type_v2)) ||
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
+ !smcd_indicated(ini->smc_type_v2)))
+ return SMC_CLC_DECL_MODEUNSUPP;
+ } else {
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
+ !smcr_indicated(ini->smc_type_v1)) ||
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
+ !smcd_indicated(ini->smc_type_v1)))
+ return SMC_CLC_DECL_MODEUNSUPP;
+ }
+
return 0;
}
@@ -983,14 +1114,15 @@ static int __smc_connect(struct smc_sock *smc)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
version);
- ini->smcd_version = SMC_V1;
- ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0;
+ ini->smcd_version = SMC_V1 | SMC_V2;
+ ini->smcr_version = SMC_V1 | SMC_V2;
ini->smc_type_v1 = SMC_TYPE_B;
- ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N;
+ ini->smc_type_v2 = SMC_TYPE_B;
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
ini->smcd_version &= ~SMC_V1;
+ ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
if (!ini->smcd_version) {
rc = SMC_CLC_DECL_GETVLANERR;
@@ -1018,15 +1150,17 @@ static int __smc_connect(struct smc_sock *smc)
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
- ini->smcd_version = version;
if (rc)
goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */
- if (aclc->hdr.typev1 == SMC_TYPE_R)
+ if (aclc->hdr.typev1 == SMC_TYPE_R) {
+ ini->smcr_version = version;
rc = smc_connect_rdma(smc, aclc, ini);
- else if (aclc->hdr.typev1 == SMC_TYPE_D)
+ } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
+ ini->smcd_version = version;
rc = smc_connect_ism(smc, aclc, ini);
+ }
if (rc)
goto vlan_cleanup;
@@ -1057,7 +1191,7 @@ static void smc_connect_work(struct work_struct *work)
if (smc->clcsock->sk->sk_err) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
} else if ((1 << smc->clcsock->sk->sk_state) &
- (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+ (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
if ((rc == -EPIPE) &&
((1 << smc->clcsock->sk->sk_state) &
@@ -1307,7 +1441,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
/* initial contact - try to establish second link */
- smc_llc_srv_add_link(link);
+ smc_llc_srv_add_link(link, NULL);
return 0;
}
@@ -1387,33 +1521,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
ini->smc_type_v1 = pclc->hdr.typev1;
ini->smc_type_v2 = pclc->hdr.typev2;
- ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
- if (pclc->hdr.version > SMC_V1)
- ini->smcd_version |=
- ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
- if (!(ini->smcd_version & SMC_V2)) {
+ ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+ ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+ if (pclc->hdr.version > SMC_V1) {
+ if (smcd_indicated(ini->smc_type_v2))
+ ini->smcd_version |= SMC_V2;
+ if (smcr_indicated(ini->smc_type_v2))
+ ini->smcr_version |= SMC_V2;
+ }
+ if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
rc = SMC_CLC_DECL_PEERNOSMC;
goto out;
}
- if (!smc_ism_is_v2_capable()) {
- ini->smcd_version &= ~SMC_V2;
- rc = SMC_CLC_DECL_NOISM2SUPP;
- goto out;
- }
pclc_v2_ext = smc_get_clc_v2_ext(pclc);
if (!pclc_v2_ext) {
ini->smcd_version &= ~SMC_V2;
+ ini->smcr_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOV2EXT;
goto out;
}
pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
- if (!pclc_smcd_v2_ext) {
- ini->smcd_version &= ~SMC_V2;
- rc = SMC_CLC_DECL_NOV2DEXT;
+ if (ini->smcd_version & SMC_V2) {
+ if (!smc_ism_is_v2_capable()) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOISM2SUPP;
+ } else if (!pclc_smcd_v2_ext) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOV2DEXT;
+ } else if (!pclc_v2_ext->hdr.eid_cnt &&
+ !pclc_v2_ext->hdr.flag.seid) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOUEID;
+ }
+ }
+ if (ini->smcr_version & SMC_V2) {
+ if (!pclc_v2_ext->hdr.eid_cnt) {
+ ini->smcr_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOUEID;
+ }
}
out:
- if (!ini->smcd_version)
+ if (!ini->smcd_version && !ini->smcr_version)
return rc;
return 0;
@@ -1533,11 +1682,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
- if (!smcd_v2_ext ||
- !smc_v2_ext->hdr.flag.seid) { /* no system EID support for SMCD */
- smc_find_ism_store_rc(SMC_CLC_DECL_NOSEID, ini);
- goto not_found;
- }
mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid)
@@ -1555,14 +1699,16 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
}
mutex_unlock(&smcd_dev_list.mutex);
- if (ini->ism_dev[0]) {
- smc_ism_get_system_eid(ini->ism_dev[0], &eid);
- if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN))
- goto not_found;
- } else {
+ if (!ini->ism_dev[0]) {
+ smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
goto not_found;
}
+ smc_ism_get_system_eid(&eid);
+ if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
+ smcd_v2_ext->system_eid, eid))
+ goto not_found;
+
/* separate - outside the smcd_dev_list.lock */
smcd_version = ini->smcd_version;
for (i = 0; i < matches; i++) {
@@ -1579,6 +1725,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
}
/* no V2 ISM device could be initialized */
ini->smcd_version = smcd_version; /* restore original value */
+ ini->negotiated_eid[0] = 0;
not_found:
ini->smcd_version &= ~SMC_V2;
@@ -1608,6 +1755,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
not_found:
smc_find_ism_store_rc(rc, ini);
+ ini->smcd_version &= ~SMC_V1;
ini->ism_dev[0] = NULL;
ini->is_smcd = false;
}
@@ -1626,24 +1774,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
return 0;
}
+static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *smc_v2_ext;
+ u8 smcr_version;
+ int rc;
+
+ if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
+ goto not_found;
+
+ smc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
+ goto not_found;
+
+ /* prepare RDMA check */
+ memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
+ ini->check_smcrv2 = true;
+ ini->smcrv2.clc_sk = new_smc->clcsock->sk;
+ ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
+ rc = smc_find_rdma_device(new_smc, ini);
+ if (rc) {
+ smc_find_ism_store_rc(rc, ini);
+ goto not_found;
+ }
+ if (!ini->smcrv2.uses_gateway)
+ memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
+
+ smcr_version = ini->smcr_version;
+ ini->smcr_version = SMC_V2;
+ rc = smc_listen_rdma_init(new_smc, ini);
+ if (!rc)
+ rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+ if (!rc)
+ return;
+ ini->smcr_version = smcr_version;
+ smc_find_ism_store_rc(rc, ini);
+
+not_found:
+ ini->smcr_version &= ~SMC_V2;
+ ini->check_smcrv2 = false;
+}
+
static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
- if (!smcr_indicated(ini->smc_type_v1))
+ if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
return SMC_CLC_DECL_NOSMCDEV;
/* prepare RDMA check */
- ini->ib_lcl = &pclc->lcl;
+ memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
rc = smc_find_rdma_device(new_smc, ini);
if (rc) {
/* no RDMA device found */
- if (ini->smc_type_v1 == SMC_TYPE_B)
- /* neither ISM nor RDMA device found */
- rc = SMC_CLC_DECL_NOSMCDEV;
- return rc;
+ return SMC_CLC_DECL_NOSMCDEV;
}
rc = smc_listen_rdma_init(new_smc, ini);
if (rc)
@@ -1656,51 +1849,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
- int rc;
+ int prfx_rc;
/* check for ISM device matching V2 proposed device */
smc_find_ism_v2_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
- if (!(ini->smcd_version & SMC_V1))
- return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV;
-
- /* check for matching IP prefix and subnet length */
- rc = smc_listen_prfx_check(new_smc, pclc);
- if (rc)
- return ini->rc ?: rc;
+ /* check for matching IP prefix and subnet length (V1) */
+ prfx_rc = smc_listen_prfx_check(new_smc, pclc);
+ if (prfx_rc)
+ smc_find_ism_store_rc(prfx_rc, ini);
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
/* check for ISM device matching V1 proposed device */
- smc_find_ism_v1_device_serv(new_smc, pclc, ini);
+ if (!prfx_rc)
+ smc_find_ism_v1_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
- if (pclc->hdr.typev1 == SMC_TYPE_D)
+ if (!smcr_indicated(pclc->hdr.typev1) &&
+ !smcr_indicated(pclc->hdr.typev2))
/* skip RDMA and decline */
return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
- /* check if RDMA is available */
- rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
- smc_find_ism_store_rc(rc, ini);
+ /* check if RDMA V2 is available */
+ smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
+ if (ini->smcrv2.ib_dev_v2)
+ return 0;
- return (!rc) ? 0 : ini->rc;
+ /* check if RDMA V1 is available */
+ if (!prfx_rc) {
+ int rc;
+
+ rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
+ smc_find_ism_store_rc(rc, ini);
+ return (!rc) ? 0 : ini->rc;
+ }
+ return SMC_CLC_DECL_NOSMCDEV;
}
/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
- bool local_first)
+ bool local_first,
+ struct smc_init_info *ini)
{
struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_first)
- smc_link_save_peer_info(link, cclc);
+ smc_link_save_peer_info(link, cclc, ini);
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
return SMC_CLC_DECL_ERR_RTOK;
@@ -1721,12 +1923,13 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
- u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc;
struct smc_init_info *ini = NULL;
+ u8 proposal_version = SMC_V1;
+ u8 accept_version;
int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@@ -1757,7 +1960,9 @@ static void smc_listen_work(struct work_struct *work)
SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc)
goto out_decl;
- version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+
+ if (pclc->hdr.version > SMC_V1)
+ proposal_version = SMC_V2;
/* IPSec connections opt out of SMC optimizations */
if (using_ipsec(new_smc)) {
@@ -1787,8 +1992,9 @@ static void smc_listen_work(struct work_struct *work)
goto out_unlock;
/* send SMC Accept CLC message */
+ accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1);
+ accept_version, ini->negotiated_eid);
if (rc)
goto out_unlock;
@@ -1810,7 +2016,7 @@ static void smc_listen_work(struct work_struct *work)
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
- ini->first_contact_local);
+ ini->first_contact_local, ini);
if (rc)
goto out_unlock;
mutex_unlock(&smc_server_lgr_pending);
@@ -1824,7 +2030,7 @@ out_unlock:
mutex_unlock(&smc_server_lgr_pending);
out_decl:
smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
- version);
+ proposal_version);
out_free:
kfree(ini);
kfree(buf);
@@ -2662,6 +2868,7 @@ static void __exit smc_exit(void)
proto_unregister(&smc_proto);
smc_pnet_exit();
smc_nl_exit();
+ smc_clc_exit();
unregister_pernet_subsys(&smc_net_stat_ops);
unregister_pernet_subsys(&smc_net_ops);
rcu_barrier();
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d65e15f0c944..f4286ca1f228 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -29,9 +29,6 @@
* devices
*/
-#define SMC_MAX_HOSTNAME_LEN 32
-#define SMC_MAX_EID_LEN 32
-
extern struct proto smc_proto;
extern struct proto smc_proto6;
@@ -59,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
- u8 type;
+ union {
+ u8 type;
+#if defined(__BIG_ENDIAN_BITFIELD)
+ struct {
+ u8 llc_version:4,
+ llc_type:4;
+ };
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ struct {
+ u8 llc_type:4,
+ llc_version:4;
+ };
+#endif
+ };
} __aligned(1);
struct smc_cdc_conn_state_flags {
@@ -289,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc)
}
#endif
+struct smc_gidlist;
+
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);
+void smc_fill_gid_list(struct smc_link_group *lgr,
+ struct smc_gidlist *gidlist,
+ struct smc_ib_device *known_dev, u8 *known_gid);
#endif /* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index f23f558054a7..99acd337ba90 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -150,9 +150,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
again:
link = conn->lnk;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
- return rc;
+ goto put_out;
spin_lock_bh(&conn->send_lock);
if (link != conn->lnk) {
@@ -160,6 +162,7 @@ again:
spin_unlock_bh(&conn->send_lock);
smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
+ smc_wr_tx_link_put(link);
if (again)
return -ENOLINK;
again = true;
@@ -167,6 +170,8 @@ again:
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e286dafd6e88..8409ab71a5e4 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -26,10 +26,12 @@
#include "smc_clc.h"
#include "smc_ib.h"
#include "smc_ism.h"
+#include "smc_netlink.h"
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
+#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108
#define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */
@@ -39,6 +41,297 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
+struct smc_clc_eid_table {
+ rwlock_t lock;
+ struct list_head list;
+ u8 ueid_cnt;
+ u8 seid_enabled;
+};
+
+static struct smc_clc_eid_table smc_clc_eid_table;
+
+struct smc_clc_eid_entry {
+ struct list_head list;
+ u8 eid[SMC_MAX_EID_LEN];
+};
+
+/* The size of a user EID is 32 characters.
+ * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'.
+ * Blanks should only be used to pad to the expected size.
+ * First character must be alphanumeric.
+ */
+static bool smc_clc_ueid_valid(char *ueid)
+{
+ char *end = ueid + SMC_MAX_EID_LEN;
+
+ while (--end >= ueid && isspace(*end))
+ ;
+ if (end < ueid)
+ return false;
+ if (!isalnum(*ueid) || islower(*ueid))
+ return false;
+ while (ueid <= end) {
+ if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' &&
+ *ueid != '-')
+ return false;
+ ueid++;
+ }
+ return true;
+}
+
+static int smc_clc_ueid_add(char *ueid)
+{
+ struct smc_clc_eid_entry *new_ueid, *tmp_ueid;
+ int rc;
+
+ if (!smc_clc_ueid_valid(ueid))
+ return -EINVAL;
+
+ /* add a new ueid entry to the ueid table if there isn't one */
+ new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL);
+ if (!new_ueid)
+ return -ENOMEM;
+ memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN);
+
+ write_lock(&smc_clc_eid_table.lock);
+ if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) {
+ rc = -ERANGE;
+ goto err_out;
+ }
+ list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
+ if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
+ rc = -EEXIST;
+ goto err_out;
+ }
+ }
+ list_add_tail(&new_ueid->list, &smc_clc_eid_table.list);
+ smc_clc_eid_table.ueid_cnt++;
+ write_unlock(&smc_clc_eid_table.lock);
+ return 0;
+
+err_out:
+ write_unlock(&smc_clc_eid_table.lock);
+ kfree(new_ueid);
+ return rc;
+}
+
+int smc_clc_ueid_count(void)
+{
+ int count;
+
+ read_lock(&smc_clc_eid_table.lock);
+ count = smc_clc_eid_table.ueid_cnt;
+ read_unlock(&smc_clc_eid_table.lock);
+
+ return count;
+}
+
+int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
+ char *ueid;
+
+ if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
+ return -EINVAL;
+ ueid = (char *)nla_data(nla_ueid);
+
+ return smc_clc_ueid_add(ueid);
+}
+
+/* remove one or all ueid entries from the table */
+static int smc_clc_ueid_remove(char *ueid)
+{
+ struct smc_clc_eid_entry *lst_ueid, *tmp_ueid;
+ int rc = -ENOENT;
+
+ /* remove table entry */
+ write_lock(&smc_clc_eid_table.lock);
+ list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list,
+ list) {
+ if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
+ list_del(&lst_ueid->list);
+ smc_clc_eid_table.ueid_cnt--;
+ kfree(lst_ueid);
+ rc = 0;
+ }
+ }
+ if (!rc && !smc_clc_eid_table.ueid_cnt) {
+ smc_clc_eid_table.seid_enabled = 1;
+ rc = -EAGAIN; /* indicate success and enabling of seid */
+ }
+ write_unlock(&smc_clc_eid_table.lock);
+ return rc;
+}
+
+int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
+ char *ueid;
+
+ if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
+ return -EINVAL;
+ ueid = (char *)nla_data(nla_ueid);
+
+ return smc_clc_ueid_remove(ueid);
+}
+
+int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info)
+{
+ smc_clc_ueid_remove(NULL);
+ return 0;
+}
+
+static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq,
+ u32 flags, char *ueid)
+{
+ char ueid_str[SMC_MAX_EID_LEN + 1];
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family,
+ flags, SMC_NETLINK_DUMP_UEID);
+ if (!hdr)
+ return -ENOMEM;
+ snprintf(ueid_str, sizeof(ueid_str), "%s", ueid);
+ if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) {
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+ }
+ genlmsg_end(skb, hdr);
+ return 0;
+}
+
+static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq,
+ int start_idx)
+{
+ struct smc_clc_eid_entry *lst_ueid;
+ int idx = 0;
+
+ read_lock(&smc_clc_eid_table.lock);
+ list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) {
+ if (idx++ < start_idx)
+ continue;
+ if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+ lst_ueid->eid)) {
+ --idx;
+ break;
+ }
+ }
+ read_unlock(&smc_clc_eid_table.lock);
+ return idx;
+}
+
+int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ int idx;
+
+ idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, cb_ctx->pos[0]);
+
+ cb_ctx->pos[0] = idx;
+ return skb->len;
+}
+
+int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ char seid_str[SMC_MAX_EID_LEN + 1];
+ u8 seid_enabled;
+ void *hdr;
+ u8 *seid;
+
+ if (cb_ctx->pos[0])
+ return skb->len;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &smc_gen_nl_family, NLM_F_MULTI,
+ SMC_NETLINK_DUMP_SEID);
+ if (!hdr)
+ return -ENOMEM;
+ if (!smc_ism_is_v2_capable())
+ goto end;
+
+ smc_ism_get_system_eid(&seid);
+ snprintf(seid_str, sizeof(seid_str), "%s", seid);
+ if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str))
+ goto err;
+ read_lock(&smc_clc_eid_table.lock);
+ seid_enabled = smc_clc_eid_table.seid_enabled;
+ read_unlock(&smc_clc_eid_table.lock);
+ if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled))
+ goto err;
+end:
+ genlmsg_end(skb, hdr);
+ cb_ctx->pos[0]++;
+ return skb->len;
+err:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info)
+{
+ write_lock(&smc_clc_eid_table.lock);
+ smc_clc_eid_table.seid_enabled = 1;
+ write_unlock(&smc_clc_eid_table.lock);
+ return 0;
+}
+
+int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info)
+{
+ int rc = 0;
+
+ write_lock(&smc_clc_eid_table.lock);
+ if (!smc_clc_eid_table.ueid_cnt)
+ rc = -ENOENT;
+ else
+ smc_clc_eid_table.seid_enabled = 0;
+ write_unlock(&smc_clc_eid_table.lock);
+ return rc;
+}
+
+static bool _smc_clc_match_ueid(u8 *peer_ueid)
+{
+ struct smc_clc_eid_entry *tmp_ueid;
+
+ list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
+ if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN))
+ return true;
+ }
+ return false;
+}
+
+bool smc_clc_match_eid(u8 *negotiated_eid,
+ struct smc_clc_v2_extension *smc_v2_ext,
+ u8 *peer_eid, u8 *local_eid)
+{
+ bool match = false;
+ int i;
+
+ negotiated_eid[0] = 0;
+ read_lock(&smc_clc_eid_table.lock);
+ if (peer_eid && local_eid &&
+ smc_clc_eid_table.seid_enabled &&
+ smc_v2_ext->hdr.flag.seid &&
+ !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) {
+ memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN);
+ match = true;
+ goto out;
+ }
+
+ for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) {
+ if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) {
+ memcpy(negotiated_eid, smc_v2_ext->user_eids[i],
+ SMC_MAX_EID_LEN);
+ match = true;
+ goto out;
+ }
+ }
+out:
+ read_unlock(&smc_clc_eid_table.lock);
+ return match;
+}
+
/* check arriving CLC proposal */
static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
{
@@ -100,6 +393,27 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
(ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
sizeof(struct smc_clc_first_contact_ext)))
return false;
+ if (hdr->typev1 == SMC_TYPE_R &&
+ ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
+ return false;
+ }
+ return true;
+}
+
+/* check arriving CLC decline */
+static bool
+smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
+{
+ struct smc_clc_msg_hdr *hdr = &dclc->hdr;
+
+ if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
+ return false;
+ if (hdr->version == SMC_V1) {
+ if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline))
+ return false;
+ } else {
+ if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2))
+ return false;
}
return true;
}
@@ -145,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
- if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ if (!smc_clc_msg_decl_valid(dclc))
return false;
- trl = &dclc->trl;
+ check_trl = false;
break;
default:
return false;
@@ -230,7 +544,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+ goto out_rel;
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
@@ -445,15 +760,16 @@ out:
/* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
{
- struct smc_clc_msg_decline dclc;
+ struct smc_clc_msg_decline *dclc_v1;
+ struct smc_clc_msg_decline_v2 dclc;
struct msghdr msg;
+ int len, send_len;
struct kvec vec;
- int len;
+ dclc_v1 = (struct smc_clc_msg_decline *)&dclc;
memset(&dclc, 0, sizeof(dclc));
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
- dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = version;
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
@@ -463,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
- memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ if (version == SMC_V1) {
+ memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ send_len = sizeof(*dclc_v1);
+ } else {
+ memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ send_len = sizeof(dclc);
+ }
+ dclc.hdr.length = htons(send_len);
memset(&msg, 0, sizeof(msg));
vec.iov_base = &dclc;
- vec.iov_len = sizeof(struct smc_clc_msg_decline);
- len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
- sizeof(struct smc_clc_msg_decline));
- if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
+ vec.iov_len = send_len;
+ len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len);
+ if (len < 0 || len < send_len)
len = -EPROTO;
return len > 0 ? 0 : len;
}
@@ -550,9 +874,10 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
if (ini->smc_type_v2 == SMC_TYPE_N) {
pclc_smcd->v2_ext_offset = 0;
} else {
+ struct smc_clc_eid_entry *ueident;
u16 v2_ext_offset;
- u8 *eid = NULL;
+ v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext_offset = sizeof(*pclc_smcd) -
offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
if (ini->smc_type_v1 != SMC_TYPE_N)
@@ -560,21 +885,31 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
- v2_ext->hdr.eid_cnt = 0;
+ plen += sizeof(*v2_ext);
+
+ read_lock(&smc_clc_eid_table.lock);
+ v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
+ plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN;
+ i = 0;
+ list_for_each_entry(ueident, &smc_clc_eid_table.list, list) {
+ memcpy(v2_ext->user_eids[i++], ueident->eid,
+ sizeof(ueident->eid));
+ }
+ read_unlock(&smc_clc_eid_table.lock);
+ }
+ if (smcd_indicated(ini->smc_type_v2)) {
+ u8 *eid = NULL;
+
+ v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
- v2_ext->hdr.flag.release = SMC_RELEASE;
- v2_ext->hdr.flag.seid = 1;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) +
v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
- if (ini->ism_dev[0])
- smc_ism_get_system_eid(ini->ism_dev[0], &eid);
- else
- smc_ism_get_system_eid(ini->ism_dev[1], &eid);
- if (eid)
+ smc_ism_get_system_eid(&eid);
+ if (eid && v2_ext->hdr.flag.seid)
memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
- plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
+ plen += sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
gidchids[i - 1].gid =
@@ -586,6 +921,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
+ if (smcr_indicated(ini->smc_type_v2))
+ memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -607,13 +945,16 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
}
if (ini->smc_type_v2 != SMC_TYPE_N) {
vec[i].iov_base = v2_ext;
- vec[i++].iov_len = sizeof(*v2_ext);
- vec[i].iov_base = smcd_v2_ext;
- vec[i++].iov_len = sizeof(*smcd_v2_ext);
- if (ini->ism_offered_cnt) {
- vec[i].iov_base = gidchids;
- vec[i++].iov_len = ini->ism_offered_cnt *
+ vec[i++].iov_len = sizeof(*v2_ext) +
+ (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
+ if (smcd_indicated(ini->smc_type_v2)) {
+ vec[i].iov_base = smcd_v2_ext;
+ vec[i++].iov_len = sizeof(*smcd_v2_ext);
+ if (ini->ism_offered_cnt) {
+ vec[i].iov_base = gidchids;
+ vec[i++].iov_len = ini->ism_offered_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
+ }
}
}
vec[i].iov_base = trl;
@@ -635,13 +976,15 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2,
- int first_contact, u8 version)
+ int first_contact, u8 version,
+ u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_first_contact_ext fce;
+ struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
- struct kvec vec[3];
+ struct kvec vec[5];
struct msghdr msg;
int i, len;
@@ -663,12 +1006,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version == SMC_V1) {
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
} else {
- u8 *eid = NULL;
-
- clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
- smc_ism_get_system_eid(conn->lgr->smcd, &eid);
- if (eid)
- memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
+ clc_v2->d1.chid =
+ htons(smc_ism_get_chid(conn->lgr->smcd));
+ if (eid && eid[0])
+ memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact)
smc_clc_fill_fce(&fce, &len);
@@ -707,6 +1048,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ if (eid && eid[0])
+ memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
+ len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ smc_clc_fill_fce(&fce, &len);
+ fce.v2_direct = !link->lgr->uses_gateway;
+ memset(&gle, 0, sizeof(gle));
+ if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ gle.gid_cnt = ini->smcrv2.gidlist.len;
+ len += sizeof(gle);
+ len += gle.gid_cnt * sizeof(gle.gid[0]);
+ } else {
+ len += sizeof(gle.reserved);
+ }
+ }
+ clc_v2->hdr.length = htons(len);
+ }
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
}
@@ -714,7 +1075,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
i = 0;
vec[i].iov_base = clc_v2;
if (version > SMC_V1)
- vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
+ vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
+ SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
+ SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) -
+ sizeof(trl);
else
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN :
@@ -723,6 +1087,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
vec[i++].iov_len = sizeof(fce);
+ if (!conn->lgr->is_smcd) {
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ vec[i].iov_base = &gle;
+ vec[i++].iov_len = sizeof(gle);
+ vec[i].iov_base = &ini->smcrv2.gidlist.list;
+ vec[i++].iov_len = gle.gid_cnt *
+ sizeof(gle.gid[0]);
+ } else {
+ vec[i].iov_base = &gle.reserved;
+ vec[i++].iov_len = sizeof(gle.reserved);
+ }
+ }
}
vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl);
@@ -732,7 +1108,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
- u8 version)
+ u8 version, u8 *eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 cclc_v2;
int reason_code = 0;
@@ -742,7 +1118,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
memset(&cclc_v2, 0, sizeof(cclc_v2));
cclc_v2.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
- version);
+ version, eid, ini);
if (len < ntohs(cclc_v2.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
@@ -757,7 +1133,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
- u8 version)
+ u8 version, u8 *negotiated_eid)
{
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
@@ -765,7 +1141,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version);
+ version, negotiated_eid, NULL);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
@@ -785,4 +1161,14 @@ void __init smc_clc_init(void)
u = utsname();
memcpy(smc_hostname, u->nodename,
min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
+
+ INIT_LIST_HEAD(&smc_clc_eid_table.list);
+ rwlock_init(&smc_clc_eid_table.lock);
+ smc_clc_eid_table.ueid_cnt = 0;
+ smc_clc_eid_table.seid_enabled = 1;
+}
+
+void smc_clc_exit(void)
+{
+ smc_clc_ueid_remove(NULL);
}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 32d37f7b70f2..83f02f131fc0 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -14,8 +14,10 @@
#define _SMC_CLC_H
#include <rdma/ib_verbs.h>
+#include <linux/smc.h>
#include "smc.h"
+#include "smc_netlink.h"
#define SMC_CLC_PROPOSAL 0x01
#define SMC_CLC_ACCEPT 0x02
@@ -42,6 +44,7 @@
#define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
+#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -52,6 +55,8 @@
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */
+#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */
+#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
@@ -158,6 +163,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
} __aligned(4);
#define SMC_CLC_MAX_V6_PREFIX 8
+#define SMC_CLC_MAX_UEID 8
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
@@ -165,6 +171,7 @@ struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_v2_extension pclc_v2_ext;
+ u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN];
struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS];
struct smc_clc_msg_trail pclc_trl;
@@ -209,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
#define SMC_CLC_OS_AIX 3
struct smc_clc_first_contact_ext {
- u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 v2_direct : 1,
+ reserved : 7;
u8 os_type : 4,
release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 7,
+ v2_direct : 1;
u8 release : 4,
os_type : 4;
#endif
@@ -221,6 +231,13 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_fce_gid_ext {
+ u8 reserved[16];
+ u8 gid_cnt;
+ u8 reserved2[3];
+ u8 gid[][SMC_GID_SIZE];
+};
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
@@ -235,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
- struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
+ struct { /* SMC-R */
+ struct smcr_clc_msg_accept_confirm r0;
+ u8 eid[SMC_MAX_EID_LEN];
+ u8 reserved6[8];
+ } r1;
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
__be16 chid;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved5[8];
- };
+ } d1;
};
};
@@ -260,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
+#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */
+
+struct smc_clc_msg_decline_v2 { /* clc decline message */
+ struct smc_clc_msg_hdr hdr;
+ u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
+ __be32 peer_diagnosis; /* diagnosis information */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 os_type : 4,
+ reserved : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 4,
+ os_type : 4;
+#endif
+ u8 reserved2[3];
+ __be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2];
+ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
+} __aligned(4);
+
/* determine start of the prefix area within the proposal message */
static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
@@ -278,6 +317,17 @@ static inline bool smcd_indicated(int smc_type)
return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
}
+static inline u8 smc_indicated_type(int is_smcd, int is_smcr)
+{
+ if (is_smcd && is_smcr)
+ return SMC_TYPE_B;
+ if (is_smcd)
+ return SMC_TYPE_D;
+ if (is_smcr)
+ return SMC_TYPE_R;
+ return SMC_TYPE_N;
+}
+
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
@@ -330,10 +380,22 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
- u8 version);
+ u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
- u8 version);
+ u8 version, u8 *negotiated_eid);
void smc_clc_init(void) __init;
+void smc_clc_exit(void);
void smc_clc_get_hostname(u8 **host);
+bool smc_clc_match_eid(u8 *negotiated_eid,
+ struct smc_clc_v2_extension *smc_v2_ext,
+ u8 *peer_eid, u8 *local_eid);
+int smc_clc_ueid_count(void);
+int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info);
+int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info);
+int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info);
+int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info);
+int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index af227b65669e..49b8ba3bb683 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -34,6 +34,7 @@
#include "smc_ism.h"
#include "smc_netlink.h"
#include "smc_stats.h"
+#include "smc_tracepoint.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
@@ -223,7 +224,6 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
char hostname[SMC_MAX_HOSTNAME_LEN + 1];
char smc_seid[SMC_MAX_EID_LEN + 1];
- struct smcd_dev *smcd_dev;
struct nlattr *attrs;
u8 *seid = NULL;
u8 *host = NULL;
@@ -245,6 +245,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
goto errattr;
+ if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
+ goto errattr;
smc_clc_get_hostname(&host);
if (host) {
memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
@@ -252,13 +254,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
goto errattr;
}
- mutex_lock(&smcd_dev_list.mutex);
- smcd_dev = list_first_entry_or_null(&smcd_dev_list.list,
- struct smcd_dev, list);
- if (smcd_dev)
- smc_ism_get_system_eid(smcd_dev, &seid);
- mutex_unlock(&smcd_dev_list.mutex);
- if (seid && smc_ism_is_v2_capable()) {
+ if (smc_ism_is_v2_capable()) {
+ smc_ism_get_system_eid(&seid);
memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
smc_seid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
@@ -277,12 +274,65 @@ errmsg:
return skb->len;
}
+/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
+static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nlattr *v2_attrs)
+{
+ char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
+ char smc_eid[SMC_MAX_EID_LEN + 1];
+
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
+ goto errv2attr;
+ memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
+ smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
+ if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
+ goto errv2attr;
+ memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
+ smc_eid[SMC_MAX_EID_LEN] = 0;
+ if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
+ goto errv2attr;
+
+ nla_nest_end(skb, v2_attrs);
+ return 0;
+
+errv2attr:
+ nla_nest_cancel(skb, v2_attrs);
+ return -EMSGSIZE;
+}
+
+static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *v2_attrs;
+
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
+ if (!v2_attrs)
+ goto errattr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
+ goto errv2attr;
+
+ nla_nest_end(skb, v2_attrs);
+ return 0;
+
+errv2attr:
+ nla_nest_cancel(skb, v2_attrs);
+errattr:
+ return -EMSGSIZE;
+}
+
static int smc_nl_fill_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
char smc_target[SMC_MAX_PNETID_LEN + 1];
- struct nlattr *attrs;
+ struct nlattr *attrs, *v2_attrs;
attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
if (!attrs)
@@ -302,6 +352,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
+ if (lgr->smc_version > SMC_V1) {
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
+ if (!v2_attrs)
+ goto errattr;
+ if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
+ goto errattr;
+ if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
+ goto errattr;
+ }
nla_nest_end(skb, attrs);
return 0;
@@ -434,10 +493,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
- char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
- char smc_eid[SMC_MAX_EID_LEN + 1];
- struct nlattr *v2_attrs;
struct nlattr *attrs;
void *nlh;
@@ -469,32 +525,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
goto errattr;
+ if (lgr->smc_version > SMC_V1) {
+ struct nlattr *v2_attrs;
- v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2);
- if (!v2_attrs)
- goto errattr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
- goto errv2attr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
- goto errv2attr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
- goto errv2attr;
- memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
- smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
- if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
- goto errv2attr;
- memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
- smc_eid[SMC_MAX_EID_LEN] = 0;
- if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
- goto errv2attr;
-
- nla_nest_end(skb, v2_attrs);
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
+ if (!v2_attrs)
+ goto errattr;
+ if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
+ goto errattr;
+ }
nla_nest_end(skb, attrs);
genlmsg_end(skb, nlh);
return 0;
-errv2attr:
- nla_nest_cancel(skb, v2_attrs);
errattr:
nla_nest_cancel(skb, attrs);
errout:
@@ -690,24 +733,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link)
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini)
{
+ struct smc_ib_device *smcibdev;
u8 rndvec[3];
int rc;
- get_device(&ini->ib_dev->ibdev->dev);
- atomic_inc(&ini->ib_dev->lnk_cnt);
+ if (lgr->smc_version == SMC_V2) {
+ lnk->smcibdev = ini->smcrv2.ib_dev_v2;
+ lnk->ibport = ini->smcrv2.ib_port_v2;
+ } else {
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ }
+ get_device(&lnk->smcibdev->ibdev->dev);
+ atomic_inc(&lnk->smcibdev->lnk_cnt);
+ lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
lnk->link_idx = link_idx;
- lnk->smcibdev = ini->ib_dev;
- lnk->ibport = ini->ib_port;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
- lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
- if (!ini->ib_dev->initialized) {
- rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+ if (!lnk->smcibdev->initialized) {
+ rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
if (rc)
goto out;
}
@@ -715,7 +764,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- ini->vlan_id, lnk->gid, &lnk->sgid_index);
+ ini->vlan_id, lnk->gid, &lnk->sgid_index,
+ lgr->smc_version == SMC_V2 ?
+ &ini->smcrv2 : NULL);
if (rc)
goto out;
rc = smc_llc_link_init(lnk);
@@ -746,11 +797,12 @@ clear_llc_lnk:
smc_llc_link_clear(lnk, false);
out:
smc_ibdev_cnt_dec(lnk);
- put_device(&ini->ib_dev->ibdev->dev);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
- if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
- wake_up(&ini->ib_dev->lnks_deleted);
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
return rc;
}
@@ -814,18 +866,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
+ struct smc_ib_device *ibdev;
+ int ibport;
+
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+ lgr->smc_version = ini->smcr_version;
+ memcpy(lgr->peer_systemid, ini->peer_systemid,
SMC_SYSTEMID_LEN);
- memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+ if (lgr->smc_version == SMC_V2) {
+ ibdev = ini->smcrv2.ib_dev_v2;
+ ibport = ini->smcrv2.ib_port_v2;
+ lgr->saddr = ini->smcrv2.saddr;
+ lgr->uses_gateway = ini->smcrv2.uses_gateway;
+ memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
+ ETH_ALEN);
+ } else {
+ ibdev = ini->ib_dev;
+ ibport = ini->ib_port;
+ }
+ memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
+ if (smc_wr_alloc_lgr_mem(lgr))
+ goto free_wq;
smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
- if (rc)
+ if (rc) {
+ smc_wr_free_lgr_mem(lgr);
goto free_wq;
+ }
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
@@ -949,7 +1020,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
to_lnk = &lgr->lnk[i];
break;
}
- if (!to_lnk) {
+ if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
smc_lgr_terminate_sched(lgr);
return NULL;
}
@@ -981,24 +1052,26 @@ again:
read_unlock_bh(&lgr->conns_lock);
/* pre-fetch buffer outside of send_lock, might sleep */
rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
smc_switch_link_and_count(conn, to_lnk);
rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
goto again;
}
read_unlock_bh(&lgr->conns_lock);
+ smc_wr_tx_link_put(to_lnk);
return to_lnk;
+
+err_out:
+ smcr_link_down_cond_sched(to_lnk);
+ smc_wr_tx_link_put(to_lnk);
+ return NULL;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
@@ -1230,6 +1303,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
+ smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
@@ -1474,7 +1548,9 @@ static void smc_conn_abort_work(struct work_struct *work)
abort_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ lock_sock(&smc->sk);
smc_conn_kill(conn, true);
+ release_sock(&smc->sk);
sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}
@@ -1545,15 +1621,19 @@ static void smcr_link_down(struct smc_link *lnk)
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
- if (smc_link_downing(&lnk->state))
+ if (smc_link_downing(&lnk->state)) {
+ trace_smcr_link_down(lnk, __builtin_return_address(0));
smcr_link_down(lnk);
+ }
}
/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
- if (smc_link_downing(&lnk->state))
+ if (smc_link_downing(&lnk->state)) {
+ trace_smcr_link_down(lnk, __builtin_return_address(0));
schedule_work(&lnk->link_down_wrk);
+ }
}
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
@@ -1638,13 +1718,15 @@ out:
return rc;
}
-static bool smcr_lgr_match(struct smc_link_group *lgr,
- struct smc_clc_msg_local *lcl,
+static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
+ u8 peer_systemid[],
+ u8 peer_gid[],
+ u8 peer_mac_v1[],
enum smc_lgr_role role, u32 clcqpn)
{
int i;
- if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+ if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
lgr->role != role)
return false;
@@ -1652,8 +1734,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
if (!smc_link_active(&lgr->lnk[i]))
continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
- !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
- !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+ !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
+ (smcr_version == SMC_V2 ||
+ !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
return true;
}
return false;
@@ -1692,7 +1775,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
ini->ism_peer_gid[ini->ism_selected]) :
- smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
+ smcr_lgr_match(lgr, ini->smcr_version,
+ ini->peer_systemid,
+ ini->peer_gid, ini->peer_mac, role,
+ ini->ib_clcqpn)) &&
!lgr->sync_err &&
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c043ecdca5c4..59cef3b830d8 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
+#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */
struct smc_wr_buf {
u8 raw[SMC_WR_BUF_SIZE];
};
+struct smc_wr_v2_buf {
+ u8 raw[SMC_WR_BUF_V2_SIZE];
+};
+
#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
enum smc_wr_reg_state {
@@ -92,7 +97,11 @@ struct smc_link {
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */
+ struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */
+ struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/
+ struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
+ dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/
atomic_long_t wr_tx_id; /* seq # of last sent WR */
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
@@ -104,6 +113,7 @@ struct smc_link {
struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */
/* above three vectors have wr_rx_cnt elements and use the same index */
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
+ dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
@@ -208,6 +218,7 @@ enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2,
SMC_LLC_FLOW_DEL_LINK = 4,
+ SMC_LLC_FLOW_REQ_ADD_LINK = 5,
SMC_LLC_FLOW_RKEY = 6,
};
@@ -250,6 +261,10 @@ struct smc_link_group {
/* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */
+ struct smc_wr_v2_buf *wr_rx_buf_v2;
+ /* WR v2 recv payload buffer */
+ struct smc_wr_v2_buf *wr_tx_buf_v2;
+ /* WR v2 send payload buffer */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
@@ -288,6 +303,9 @@ struct smc_link_group {
/* link keep alive time */
u32 llc_termination_rsn;
/* rsn code for termination */
+ u8 nexthop_mac[ETH_ALEN];
+ u8 uses_gateway;
+ __be32 saddr;
};
struct { /* SMC-D */
u64 peer_gid;
@@ -302,6 +320,31 @@ struct smc_link_group {
struct smc_clc_msg_local;
+#define GID_LIST_SIZE 2
+
+struct smc_gidlist {
+ u8 len;
+ u8 list[GID_LIST_SIZE][SMC_GID_SIZE];
+};
+
+struct smc_init_info_smcrv2 {
+ /* Input fields */
+ __be32 saddr;
+ struct sock *clc_sk;
+ __be32 daddr;
+
+ /* Output fields when saddr is set */
+ struct smc_ib_device *ib_dev_v2;
+ u8 ib_port_v2;
+ u8 ib_gid_v2[SMC_GID_SIZE];
+
+ /* Additional output fields when clc_sk and daddr is set as well */
+ u8 uses_gateway;
+ u8 nexthop_mac[ETH_ALEN];
+
+ struct smc_gidlist gidlist;
+};
+
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
@@ -310,12 +353,18 @@ struct smc_init_info {
u8 first_contact_local;
unsigned short vlan_id;
u32 rc;
+ u8 negotiated_eid[SMC_MAX_EID_LEN];
/* SMC-R */
- struct smc_clc_msg_local *ib_lcl;
+ u8 smcr_version;
+ u8 check_smcrv2;
+ u8 peer_gid[SMC_GID_SIZE];
+ u8 peer_mac[ETH_ALEN];
+ u8 peer_systemid[SMC_SYSTEMID_LEN];
struct smc_ib_device *ib_dev;
u8 ib_gid[SMC_GID_SIZE];
u8 ib_port;
u32 ib_clcqpn;
+ struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */
u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a8845343d183..d93055ec17ae 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -17,6 +17,7 @@
#include <linux/scatterlist.h>
#include <linux/wait.h>
#include <linux/mutex.h>
+#include <linux/inetdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -62,16 +63,23 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
struct ib_qp_attr qp_attr;
+ u8 hop_lim = 1;
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
- rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
+ if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
+ hop_lim = IPV6_DEFAULT_HOPLIMIT;
+ rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0);
rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
- memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
- sizeof(lnk->peer_mac));
+ if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac,
+ sizeof(lnk->lgr->nexthop_mac));
+ else
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
+ sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn;
qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming
@@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
+int smc_ib_find_route(__be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway)
+{
+ struct neighbour *neigh = NULL;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4 = {
+ .saddr = saddr,
+ .daddr = daddr
+ };
+
+ if (daddr == cpu_to_be32(INADDR_NONE))
+ goto out;
+ rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out;
+ if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
+ goto out;
+ neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
+ if (neigh) {
+ memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+ *uses_gateway = rt->rt_uses_gateway;
+ return 0;
+ }
+out:
+ return -ENOENT;
+}
+
+static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+ const struct ib_gid_attr *attr,
+ u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2)
+{
+ if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
+ if (gid)
+ memcpy(gid, &attr->gid, SMC_GID_SIZE);
+ if (sgid_index)
+ *sgid_index = attr->index;
+ return 0;
+ }
+ if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
+ struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ const struct in_ifaddr *ifa;
+ bool subnet_match = false;
+
+ if (!in_dev)
+ goto out;
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (!inet_ifa_match(smcrv2->saddr, ifa))
+ continue;
+ subnet_match = true;
+ break;
+ }
+ if (!subnet_match)
+ goto out;
+ if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ smcrv2->daddr,
+ smcrv2->nexthop_mac,
+ &smcrv2->uses_gateway))
+ goto out;
+
+ if (gid)
+ memcpy(gid, &attr->gid, SMC_GID_SIZE);
+ if (sgid_index)
+ *sgid_index = attr->index;
+ return 0;
+ }
+out:
+ return -ENODEV;
+}
+
/* determine the gid for an ib-device port and vlan id */
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
- unsigned short vlan_id, u8 gid[], u8 *sgid_index)
+ unsigned short vlan_id, u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2)
{
const struct ib_gid_attr *attr;
const struct net_device *ndev;
@@ -201,15 +281,13 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
if (!IS_ERR(ndev) &&
((!vlan_id && !is_vlan_dev(ndev)) ||
(vlan_id && is_vlan_dev(ndev) &&
- vlan_dev_vlan_id(ndev) == vlan_id)) &&
- attr->gid_type == IB_GID_TYPE_ROCE) {
- rcu_read_unlock();
- if (gid)
- memcpy(gid, &attr->gid, SMC_GID_SIZE);
- if (sgid_index)
- *sgid_index = attr->index;
- rdma_put_gid_attr(attr);
- return 0;
+ vlan_dev_vlan_id(ndev) == vlan_id))) {
+ if (!smc_ib_determine_gid_rcu(ndev, attr, gid,
+ sgid_index, smcrv2)) {
+ rcu_read_unlock();
+ rdma_put_gid_attr(attr);
+ return 0;
+ }
}
rcu_read_unlock();
rdma_put_gid_attr(attr);
@@ -217,6 +295,58 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
return -ENODEV;
}
+/* check if gid is still defined on smcibdev */
+static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2,
+ struct smc_ib_device *smcibdev, u8 ibport)
+{
+ const struct ib_gid_attr *attr;
+ bool rc = false;
+ int i;
+
+ for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
+ attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
+ if (IS_ERR(attr))
+ continue;
+
+ rcu_read_lock();
+ if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) ||
+ (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ !(ipv6_addr_type((const struct in6_addr *)&attr->gid)
+ & IPV6_ADDR_LINKLOCAL)))
+ if (!memcmp(gid, &attr->gid, SMC_GID_SIZE))
+ rc = true;
+ rcu_read_unlock();
+ rdma_put_gid_attr(attr);
+ }
+ return rc;
+}
+
+/* check all links if the gid is still defined on smcibdev */
+static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr;
+ int i;
+
+ spin_lock_bh(&smc_lgr_list.lock);
+ list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN))
+ continue; /* lgr is not affected */
+ if (list_empty(&lgr->list))
+ continue;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
+ lgr->lnk[i].smcibdev != smcibdev)
+ continue;
+ if (!smc_ib_check_link_gid(lgr->lnk[i].gid,
+ lgr->smc_version == SMC_V2,
+ smcibdev, ibport))
+ smcr_port_err(smcibdev, ibport);
+ }
+ }
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
@@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
smcr_port_add(smcibdev, port_idx + 1);
+ smc_ib_gid_check(smcibdev, port_idx + 1);
}
}
}
@@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk)
/* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk)
{
+ int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
struct ib_qp_init_attr qp_attr = {
.event_handler = smc_ib_qp_event_handler,
.qp_context = lnk,
@@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
- .max_recv_sge = 1,
+ .max_recv_sge = sges_per_buf,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 3085f5180da7..07585937370e 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */
int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
};
+static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
+{
+ struct in6_addr *addr6 = (struct in6_addr *)gid;
+
+ if (ipv6_addr_v4mapped(addr6) ||
+ !(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2]))
+ return addr6->s6_addr32[3];
+ return cpu_to_be32(INADDR_NONE);
+}
+
+struct smc_init_info_smcrv2;
struct smc_buf_desc;
struct smc_link;
@@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
- unsigned short vlan_id, u8 gid[], u8 *sgid_index);
+ unsigned short vlan_id, u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2);
+int smc_ib_find_route(__be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
#endif
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 9cb2df289963..fd28cc498b98 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -23,6 +23,7 @@ struct smcd_dev_list smcd_dev_list = {
};
static bool smc_ism_v2_capable;
+static u8 smc_ism_v2_system_eid[SMC_MAX_EID_LEN];
/* Test if an ISM communication is possible - same CPC */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
@@ -42,9 +43,12 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
return rc < 0 ? rc : 0;
}
-void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid)
+void smc_ism_get_system_eid(u8 **eid)
{
- smcd->ops->get_system_eid(smcd, eid);
+ if (!smc_ism_v2_capable)
+ *eid = NULL;
+ else
+ *eid = smc_ism_v2_system_eid;
}
u16 smc_ism_get_chid(struct smcd_dev *smcd)
@@ -435,9 +439,12 @@ int smcd_register_dev(struct smcd_dev *smcd)
if (list_empty(&smcd_dev_list.list)) {
u8 *system_eid = NULL;
- smc_ism_get_system_eid(smcd, &system_eid);
- if (system_eid[24] != '0' || system_eid[28] != '0')
+ smcd->ops->get_system_eid(smcd, &system_eid);
+ if (system_eid[24] != '0' || system_eid[28] != '0') {
smc_ism_v2_capable = true;
+ memcpy(smc_ism_v2_system_eid, system_eid,
+ SMC_MAX_EID_LEN);
+ }
}
/* sort list: devices without pnetid before devices with pnetid */
if (smcd->pnetid[0])
@@ -533,4 +540,5 @@ EXPORT_SYMBOL_GPL(smcd_handle_irq);
void __init smc_ism_init(void)
{
smc_ism_v2_capable = false;
+ memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN);
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 113efc7352ed..004b22a13ffa 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -48,7 +48,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
-void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid);
+void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
void smc_ism_init(void);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2e7560eba981..b102680296b8 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -23,16 +23,24 @@
struct smc_llc_hdr {
struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
-#if defined(__BIG_ENDIAN_BITFIELD)
- u8 reserved:4,
- add_link_rej_rsn:4;
+ union {
+ struct {
+ u8 length; /* 44 */
+ #if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u8 add_link_rej_rsn:4,
- reserved:4;
+ u8 add_link_rej_rsn:4,
+ reserved:4;
#endif
+ };
+ u16 length_v2; /* 44 - 8192*/
+ };
u8 flags;
-};
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2
+ * (https://www.ibm.com/support/pages/node/6326337)
+ */
#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
@@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt {
__be64 rmb_vaddr_new;
};
+struct smc_llc_msg_add_link_v2_ext {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 v2_direct : 1,
+ reserved : 7;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 7,
+ v2_direct : 1;
+#endif
+ u8 reserved2;
+ u8 client_target_gid[SMC_GID_SIZE];
+ u8 reserved3[8];
+ u16 num_rkeys;
+ struct smc_llc_msg_add_link_cont_rt rt[];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2
+ * (https://www.ibm.com/support/pages/node/6326337)
+ */
+
+struct smc_llc_msg_req_add_link_v2 {
+ struct smc_llc_hdr hd;
+ u8 reserved[20];
+ u8 gid_cnt;
+ u8 reserved2[3];
+ u8 gid[][SMC_GID_SIZE];
+};
+
#define SMC_LLC_RKEYS_PER_CONT_MSG 2
struct smc_llc_msg_add_link_cont { /* type 0x03 */
@@ -114,7 +148,8 @@ struct smc_rmb_rtoken {
__be64 rmb_vaddr;
} __packed; /* format defined in RFC7609 */
-#define SMC_LLC_RKEYS_PER_MSG 3
+#define SMC_LLC_RKEYS_PER_MSG 3
+#define SMC_LLC_RKEYS_PER_MSG_V2 255
struct smc_llc_msg_confirm_rkey { /* type 0x06 */
struct smc_llc_hdr hd;
@@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */
u8 reserved2[4];
};
+struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 num_inval_rkeys;
+ u8 reserved[2];
+ __be32 rkey[];
+};
+
union smc_llc_msg {
struct smc_llc_msg_confirm_link confirm_link;
struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_req_add_link_v2 req_add_link;
struct smc_llc_msg_add_link_cont add_link_cont;
struct smc_llc_msg_del_link delete_link;
@@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
struct smc_llc_qentry *qentry)
{
- u8 msg_type = qentry->msg.raw.hdr.common.type;
+ u8 msg_type = qentry->msg.raw.hdr.common.llc_type;
if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
flow_type != msg_type && !lgr->delayed_event) {
@@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
spin_unlock_bh(&lgr->llc_flow_lock);
return false;
}
- switch (qentry->msg.raw.hdr.common.type) {
+ switch (qentry->msg.raw.hdr.common.llc_type) {
case SMC_LLC_ADD_LINK:
flow->type = SMC_LLC_FLOW_ADD_LINK;
break;
@@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
smc_llc_flow_qentry_del(flow);
goto out;
}
- rcv_msg = flow->qentry->msg.raw.hdr.common.type;
+ rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type;
if (exp_msg && rcv_msg != exp_msg) {
if (exp_msg == SMC_LLC_ADD_LINK &&
rcv_msg == SMC_LLC_DELETE_LINK) {
@@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link,
return 0;
}
+static int smc_llc_add_pending_send_v2(struct smc_link *link,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **pend)
+{
+ int rc;
+
+ rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend);
+ if (rc < 0)
+ return rc;
+ return 0;
+}
+
+static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr,
+ struct smc_link_group *lgr, size_t len)
+{
+ if (lgr->smc_version == SMC_V2) {
+ hdr->common.llc_version = SMC_V2;
+ hdr->length_v2 = len;
+ } else {
+ hdr->common.llc_version = 0;
+ hdr->length = len;
+ }
+}
+
/* high-level API to send LLC confirm link */
int smc_llc_send_confirm_link(struct smc_link *link,
enum smc_llc_reqresp reqresp)
@@ -383,13 +451,15 @@ int smc_llc_send_confirm_link(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc));
- confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
- confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK;
+ smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc));
confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
@@ -402,6 +472,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -415,13 +487,15 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_link *link;
int i, rc, rtok_ix;
+ if (!smc_wr_tx_link_hold(send_link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
- rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
- rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+ rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY;
+ smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc));
rtok_ix = 1;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
@@ -444,6 +518,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
+put_out:
+ smc_wr_tx_link_put(send_link);
return rc;
}
@@ -456,38 +532,134 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
- rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
- rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
+ rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
+ smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
rkeyllc->num_rkeys = 1;
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
+/* return first buffer from any of the next buf lists */
+static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ struct smc_buf_desc *buf_pos;
+
+ while (*buf_lst < SMC_RMBE_SIZES) {
+ buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
+ struct smc_buf_desc, list);
+ if (buf_pos)
+ return buf_pos;
+ (*buf_lst)++;
+ }
+ return NULL;
+}
+
+/* return next rmb from buffer lists */
+static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst,
+ struct smc_buf_desc *buf_pos)
+{
+ struct smc_buf_desc *buf_next;
+
+ if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
+ (*buf_lst)++;
+ return _smc_llc_get_next_rmb(lgr, buf_lst);
+ }
+ buf_next = list_next_entry(buf_pos, list);
+ return buf_next;
+}
+
+static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ *buf_lst = 0;
+ return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
+}
+
+static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
+ struct smc_link *link, struct smc_link *link_new)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_buf_desc *buf_pos;
+ int prim_lnk_idx, lnk_idx, i;
+ struct smc_buf_desc *rmb;
+ int len = sizeof(*ext);
+ int buf_lst;
+
+ ext->v2_direct = !lgr->uses_gateway;
+ memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE);
+
+ prim_lnk_idx = link->link_idx;
+ lnk_idx = link_new->link_idx;
+ mutex_lock(&lgr->rmbs_lock);
+ ext->num_rkeys = lgr->conns_num;
+ if (!ext->num_rkeys)
+ goto out;
+ buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+ for (i = 0; i < ext->num_rkeys; i++) {
+ if (!buf_pos)
+ break;
+ rmb = buf_pos;
+ ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
+ ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
+ ext->rt[i].rmb_vaddr_new =
+ cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
+ buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
+ while (buf_pos && !(buf_pos)->used)
+ buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
+ }
+ len += i * sizeof(ext->rt[0]);
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return len;
+}
+
/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_link *link_new,
enum smc_llc_reqresp reqresp)
{
+ struct smc_llc_msg_add_link_v2_ext *ext = NULL;
struct smc_llc_msg_add_link *addllc;
struct smc_wr_tx_pend_priv *pend;
- struct smc_wr_buf *wr_buf;
+ int len = sizeof(*addllc);
int rc;
- rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
- if (rc)
- return rc;
- addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
+ if (link->lgr->smc_version == SMC_V2) {
+ struct smc_wr_v2_buf *wr_buf;
+
+ rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ ext = (struct smc_llc_msg_add_link_v2_ext *)
+ &wr_buf->raw[sizeof(*addllc)];
+ memset(ext, 0, SMC_WR_TX_SIZE);
+ } else {
+ struct smc_wr_buf *wr_buf;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ }
memset(addllc, 0, sizeof(*addllc));
- addllc->hd.common.type = SMC_LLC_ADD_LINK;
- addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ addllc->hd.common.llc_type = SMC_LLC_ADD_LINK;
if (reqresp == SMC_LLC_RESP)
addllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(addllc->sender_mac, mac, ETH_ALEN);
@@ -502,8 +674,16 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
addllc->qp_mtu = min(link_new->path_mtu,
link_new->peer_mtu);
}
+ if (ext && link_new)
+ len += smc_llc_fill_ext_v2(ext, link, link_new);
+ smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len);
/* send llc message */
- rc = smc_wr_tx_send(link, pend);
+ if (link->lgr->smc_version == SMC_V2)
+ rc = smc_wr_tx_v2_send(link, pend, len);
+ else
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -517,14 +697,16 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc));
- delllc->hd.common.type = SMC_LLC_DELETE_LINK;
- delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
+ delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc));
if (reqresp == SMC_LLC_RESP)
delllc->hd.flags |= SMC_LLC_FLAG_RESP;
if (orderly)
@@ -536,6 +718,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -547,16 +731,20 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc));
- testllc->hd.common.type = SMC_LLC_TEST_LINK;
- testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ testllc->hd.common.llc_type = SMC_LLC_TEST_LINK;
+ smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc));
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -567,13 +755,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/* schedule an llc send on link, may wait for buffers,
@@ -586,13 +777,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+ rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/********************************* receive ***********************************/
@@ -621,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
return -EMLINK;
}
-/* return first buffer from any of the next buf lists */
-static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
- int *buf_lst)
-{
- struct smc_buf_desc *buf_pos;
-
- while (*buf_lst < SMC_RMBE_SIZES) {
- buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
- struct smc_buf_desc, list);
- if (buf_pos)
- return buf_pos;
- (*buf_lst)++;
- }
- return NULL;
-}
-
-/* return next rmb from buffer lists */
-static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
- int *buf_lst,
- struct smc_buf_desc *buf_pos)
-{
- struct smc_buf_desc *buf_next;
-
- if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
- (*buf_lst)++;
- return _smc_llc_get_next_rmb(lgr, buf_lst);
- }
- buf_next = list_next_entry(buf_pos, list);
- return buf_next;
-}
-
-static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
- int *buf_lst)
-{
- *buf_lst = 0;
- return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
-}
-
/* send one add_link_continue msg */
static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_link *link_new, u8 *num_rkeys_todo,
@@ -672,9 +828,11 @@ static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_buf_desc *rmb;
u8 n;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
memset(addc_llc, 0, sizeof(*addc_llc));
@@ -702,11 +860,14 @@ static int smc_llc_add_link_cont(struct smc_link *link,
while (*buf_pos && !(*buf_pos)->used)
*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
}
- addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
+ addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
static int smc_llc_cli_rkey_exchange(struct smc_link *link,
@@ -758,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
+ sizeof(qentry->msg));
return smc_llc_send_message(qentry->link, &qentry->msg);
}
@@ -778,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
SMC_LLC_DEL_LOST_PATH);
return -ENOLINK;
}
- if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+ if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* received DELETE_LINK instead */
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg);
@@ -819,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
return 0;
}
+static void smc_llc_save_add_link_rkeys(struct smc_link *link,
+ struct smc_link *link_new)
+{
+ struct smc_llc_msg_add_link_v2_ext *ext;
+ struct smc_link_group *lgr = link->lgr;
+ int max, i;
+
+ ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
+ SMC_WR_TX_SIZE);
+ max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
+ mutex_lock(&lgr->rmbs_lock);
+ for (i = 0; i < max; i++) {
+ smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+ ext->rt[i].rmb_key,
+ ext->rt[i].rmb_vaddr_new,
+ ext->rt[i].rmb_key_new);
+ }
+ mutex_unlock(&lgr->rmbs_lock);
+}
+
static void smc_llc_save_add_link_info(struct smc_link *link,
struct smc_llc_msg_add_link *add_llc)
{
@@ -835,31 +1018,47 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = smc_get_lgr(link);
+ struct smc_init_info *ini = NULL;
struct smc_link *lnk_new = NULL;
- struct smc_init_info ini;
int lnk_idx, rc = 0;
if (!llc->qp_mtu)
goto out_reject;
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini) {
+ rc = -ENOMEM;
+ goto out_reject;
+ }
+
+ ini->vlan_id = lgr->vlan_id;
+ if (lgr->smc_version == SMC_V2) {
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = lgr->saddr;
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
+ }
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
- !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
- if (!ini.ib_dev)
+ (lgr->smc_version == SMC_V2 ||
+ !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+ if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2)
goto out_reject;
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
}
- if (!ini.ib_dev) {
+ if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
- ini.ib_dev = link->smcibdev;
- ini.ib_port = link->ibport;
+ ini->smcrv2.ib_dev_v2 = link->smcibdev;
+ ini->smcrv2.ib_port_v2 = link->ibport;
+ } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini->ib_dev = link->smcibdev;
+ ini->ib_port = link->ibport;
}
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
if (lnk_idx < 0)
goto out_reject;
lnk_new = &lgr->lnk[lnk_idx];
- rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
+ rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini);
if (rc)
goto out_reject;
smc_llc_save_add_link_info(lnk_new, llc);
@@ -875,16 +1074,20 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_clear_lnk;
rc = smc_llc_send_add_link(link,
- lnk_new->smcibdev->mac[ini.ib_port - 1],
+ lnk_new->smcibdev->mac[lnk_new->ibport - 1],
lnk_new->gid, lnk_new, SMC_LLC_RESP);
if (rc)
goto out_clear_lnk;
- rc = smc_llc_cli_rkey_exchange(link, lnk_new);
- if (rc) {
- rc = 0;
- goto out_clear_lnk;
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_save_add_link_rkeys(link, lnk_new);
+ } else {
+ rc = smc_llc_cli_rkey_exchange(link, lnk_new);
+ if (rc) {
+ rc = 0;
+ goto out_clear_lnk;
+ }
}
- rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t);
+ rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t);
if (!rc)
goto out;
out_clear_lnk:
@@ -893,29 +1096,78 @@ out_clear_lnk:
out_reject:
smc_llc_cli_add_link_reject(qentry);
out:
+ kfree(ini);
kfree(qentry);
return rc;
}
+static void smc_llc_send_request_add_link(struct smc_link *link)
+{
+ struct smc_llc_msg_req_add_link_v2 *llc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_v2_buf *wr_buf;
+ struct smc_gidlist gidlist;
+ int rc, len, i;
+
+ if (!smc_wr_tx_link_hold(link))
+ return;
+ if (link->lgr->type == SMC_LGR_SYMMETRIC ||
+ link->lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ goto put_out;
+
+ smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid);
+ if (gidlist.len <= 1)
+ goto put_out;
+
+ rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf;
+ memset(llc, 0, SMC_WR_TX_SIZE);
+
+ llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK;
+ for (i = 0; i < gidlist.len; i++)
+ memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0]));
+ llc->gid_cnt = gidlist.len;
+ len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0]));
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, len);
+ rc = smc_wr_tx_v2_send(link, pend, len);
+ if (!rc)
+ /* set REQ_ADD_LINK flow and wait for response from peer */
+ link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK;
+put_out:
+ smc_wr_tx_link_put(link);
+}
+
/* as an SMC client, invite server to start the add_link processing */
static void smc_llc_cli_add_link_invite(struct smc_link *link,
struct smc_llc_qentry *qentry)
{
struct smc_link_group *lgr = smc_get_lgr(link);
- struct smc_init_info ini;
+ struct smc_init_info *ini = NULL;
+
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_send_request_add_link(link);
+ goto out;
+ }
if (lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
- if (!ini.ib_dev)
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini)
goto out;
- smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1],
- ini.ib_gid, NULL, SMC_LLC_REQ);
+ ini->vlan_id = lgr->vlan_id;
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
+ if (!ini->ib_dev)
+ goto out;
+
+ smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1],
+ ini->ib_gid, NULL, SMC_LLC_REQ);
out:
+ kfree(ini);
kfree(qentry);
}
@@ -931,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc)
static bool smc_llc_is_local_add_link(union smc_llc_msg *llc)
{
- if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK &&
+ if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK &&
smc_llc_is_empty_llc_message(llc))
return true;
return false;
@@ -1098,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
/* receive CONFIRM LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0);
if (!qentry ||
- qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+ qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* send DELETE LINK */
smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
false, SMC_LLC_DEL_LOST_PATH);
@@ -1117,37 +1369,80 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
return 0;
}
-int smc_llc_srv_add_link(struct smc_link *link)
+static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry)
+{
+ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
+ sizeof(qentry->msg));
+ memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data));
+ smc_llc_send_message(qentry->link, &qentry->msg);
+}
+
+int smc_llc_srv_add_link(struct smc_link *link,
+ struct smc_llc_qentry *req_qentry)
{
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = link->lgr;
struct smc_llc_msg_add_link *add_llc;
struct smc_llc_qentry *qentry = NULL;
- struct smc_link *link_new;
- struct smc_init_info ini;
+ bool send_req_add_link_resp = false;
+ struct smc_link *link_new = NULL;
+ struct smc_init_info *ini = NULL;
int lnk_idx, rc = 0;
+ if (req_qentry &&
+ req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK)
+ send_req_add_link_resp = true;
+
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
- if (!ini.ib_dev) {
+ ini->vlan_id = lgr->vlan_id;
+ if (lgr->smc_version == SMC_V2) {
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = lgr->saddr;
+ if (send_req_add_link_resp) {
+ struct smc_llc_msg_req_add_link_v2 *req_add =
+ &req_qentry->msg.req_add_link;
+
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
+ }
+ }
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
+ if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini->smcrv2.ib_dev_v2 = link->smcibdev;
+ ini->smcrv2.ib_port_v2 = link->ibport;
+ } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
- ini.ib_dev = link->smcibdev;
- ini.ib_port = link->ibport;
+ ini->ib_dev = link->smcibdev;
+ ini->ib_port = link->ibport;
}
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
- if (lnk_idx < 0)
- return 0;
+ if (lnk_idx < 0) {
+ rc = 0;
+ goto out;
+ }
- rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini);
+ rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini);
if (rc)
- return rc;
+ goto out;
link_new = &lgr->lnk[lnk_idx];
+
+ rc = smcr_buf_map_lgr(link_new);
+ if (rc)
+ goto out_err;
+
rc = smc_llc_send_add_link(link,
- link_new->smcibdev->mac[ini.ib_port - 1],
+ link_new->smcibdev->mac[link_new->ibport-1],
link_new->gid, link_new, SMC_LLC_REQ);
if (rc)
goto out_err;
+ send_req_add_link_resp = false;
/* receive ADD LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
if (!qentry) {
@@ -1162,7 +1457,8 @@ int smc_llc_srv_add_link(struct smc_link *link)
}
if (lgr->type == SMC_LGR_SINGLE &&
(!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
- !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+ (lgr->smc_version == SMC_V2 ||
+ !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) {
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
}
smc_llc_save_add_link_info(link_new, add_llc);
@@ -1171,39 +1467,49 @@ int smc_llc_srv_add_link(struct smc_link *link)
rc = smc_ib_ready_link(link_new);
if (rc)
goto out_err;
- rc = smcr_buf_map_lgr(link_new);
- if (rc)
- goto out_err;
rc = smcr_buf_reg_lgr(link_new);
if (rc)
goto out_err;
- rc = smc_llc_srv_rkey_exchange(link, link_new);
- if (rc)
- goto out_err;
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_save_add_link_rkeys(link, link_new);
+ } else {
+ rc = smc_llc_srv_rkey_exchange(link, link_new);
+ if (rc)
+ goto out_err;
+ }
rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
if (rc)
goto out_err;
+ kfree(ini);
return 0;
out_err:
- link_new->state = SMC_LNK_INACTIVE;
- smcr_link_clear(link_new, false);
+ if (link_new) {
+ link_new->state = SMC_LNK_INACTIVE;
+ smcr_link_clear(link_new, false);
+ }
+out:
+ kfree(ini);
+ if (send_req_add_link_resp)
+ smc_llc_send_req_add_link_response(req_qentry);
return rc;
}
static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
{
struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
+ struct smc_llc_qentry *qentry;
int rc;
- smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
mutex_lock(&lgr->llc_conf_mutex);
- rc = smc_llc_srv_add_link(link);
+ rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
mutex_unlock(&lgr->llc_conf_mutex);
+ kfree(qentry);
}
/* enqueue a local add_link req to trigger a new add_link flow */
@@ -1211,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link)
{
struct smc_llc_msg_add_link add_llc = {};
- add_llc.hd.length = sizeof(add_llc);
- add_llc.hd.common.type = SMC_LLC_ADD_LINK;
+ add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK;
+ smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc));
/* no dev and port needed */
smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
}
@@ -1234,7 +1540,8 @@ static void smc_llc_add_link_work(struct work_struct *work)
else
smc_llc_process_srv_add_link(lgr);
out:
- smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK)
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
/* enqueue a local del_link msg to trigger a new del_link flow,
@@ -1244,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
struct smc_llc_msg_del_link del_llc = {};
- del_llc.hd.length = sizeof(del_llc);
- del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
+ del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc));
del_llc.link_num = del_link_id;
del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
@@ -1315,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
struct smc_llc_msg_del_link delllc = {};
int i;
- delllc.hd.common.type = SMC_LLC_DELETE_LINK;
- delllc.hd.length = sizeof(delllc);
+ delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc));
if (ord)
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
@@ -1432,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
link = qentry->link;
num_entries = llc->rtoken[0].num_rkeys;
+ if (num_entries > SMC_LLC_RKEYS_PER_MSG)
+ goto out_err;
/* first rkey entry is for receiving link */
rk_idx = smc_rtoken_add(link,
llc->rtoken[0].rmb_vaddr,
@@ -1450,6 +1759,7 @@ out_err:
llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
out:
llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}
@@ -1467,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc = &qentry->msg.delete_rkey;
link = qentry->link;
+ if (lgr->smc_version == SMC_V2) {
+ struct smc_llc_msg_delete_rkey_v2 *llcv2;
+
+ memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
+ llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
+ llcv2->num_inval_rkeys = 0;
+
+ max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(link, llcv2->rkey[i]))
+ llcv2->num_inval_rkeys++;
+ }
+ memset(&llc->rkey[0], 0, sizeof(llc->rkey));
+ memset(&llc->reserved2, 0, sizeof(llc->reserved2));
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
+ if (llcv2->num_inval_rkeys) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = llcv2->num_inval_rkeys;
+ }
+ goto finish;
+ }
+
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llc->rkey[i]))
@@ -1476,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
+finish:
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
@@ -1511,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
if (!smc_link_usable(link))
goto out;
- switch (llc->raw.hdr.common.type) {
+ switch (llc->raw.hdr.common.llc_type) {
case SMC_LLC_TEST_LINK:
llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
@@ -1536,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
qentry);
wake_up(&lgr->llc_msg_waiter);
- } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
- qentry)) {
+ return;
+ }
+ if (lgr->llc_flow_lcl.type ==
+ SMC_LLC_FLOW_REQ_ADD_LINK) {
+ /* server started add_link processing */
+ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ schedule_work(&lgr->llc_add_link_work);
+ return;
+ }
+ if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
schedule_work(&lgr->llc_add_link_work);
}
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
@@ -1585,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
}
return;
+ case SMC_LLC_REQ_ADD_LINK:
+ /* handle response here, smc_llc_flow_stop() cannot be called
+ * in tasklet context
+ */
+ if (lgr->role == SMC_CLNT &&
+ lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK &&
+ (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) {
+ smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl);
+ } else if (lgr->role == SMC_SERV) {
+ if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+ /* as smc server, handle client suggestion */
+ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ return;
+ }
+ break;
default:
smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
break;
@@ -1628,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link,
{
enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type;
struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl;
- u8 llc_type = qentry->msg.raw.hdr.common.type;
+ u8 llc_type = qentry->msg.raw.hdr.common.llc_type;
switch (llc_type) {
case SMC_LLC_TEST_LINK:
@@ -1654,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link,
/* not used because max links is 3 */
break;
default:
- smc_llc_protocol_violation(link->lgr, llc_type);
+ smc_llc_protocol_violation(link->lgr,
+ qentry->msg.raw.hdr.common.type);
break;
}
kfree(qentry);
@@ -1679,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
/* process responses immediately */
- if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+ if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) &&
+ llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) {
smc_llc_rx_response(link, qentry);
return;
}
@@ -1699,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
if (wc->byte_len < sizeof(*llc))
return; /* short message */
- if (llc->raw.hdr.length != sizeof(*llc))
- return; /* invalid message */
+ if (!llc->raw.hdr.common.llc_version) {
+ if (llc->raw.hdr.length != sizeof(*llc))
+ return; /* invalid message */
+ } else {
+ if (llc->raw.hdr.length_v2 < sizeof(*llc))
+ return; /* invalid message */
+ }
smc_llc_enqueue(link, llc);
}
@@ -1787,7 +2154,7 @@ void smc_llc_link_active(struct smc_link *link)
link->smcibdev->ibdev->name, link->ibport);
link->state = SMC_LNK_ACTIVE;
if (link->lgr->llc_testlink_time) {
- link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
+ link->llc_testlink_time = link->lgr->llc_testlink_time;
schedule_delayed_work(&link->llc_testlink_wrk,
link->llc_testlink_time);
}
@@ -1919,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_RKEY
},
+ /* V2 types */
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_REQ_ADD_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY_V2
+ },
{
.handler = NULL,
}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index cc00a2ec4e92..4404e52b3346 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -30,10 +30,19 @@ enum smc_llc_msg_type {
SMC_LLC_ADD_LINK = 0x02,
SMC_LLC_ADD_LINK_CONT = 0x03,
SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_REQ_ADD_LINK = 0x05,
SMC_LLC_CONFIRM_RKEY = 0x06,
SMC_LLC_TEST_LINK = 0x07,
SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
SMC_LLC_DELETE_RKEY = 0x09,
+ /* V2 types */
+ SMC_LLC_CONFIRM_LINK_V2 = 0x21,
+ SMC_LLC_ADD_LINK_V2 = 0x22,
+ SMC_LLC_DELETE_LINK_V2 = 0x24,
+ SMC_LLC_REQ_ADD_LINK_V2 = 0x25,
+ SMC_LLC_CONFIRM_RKEY_V2 = 0x26,
+ SMC_LLC_TEST_LINK_V2 = 0x27,
+ SMC_LLC_DELETE_RKEY_V2 = 0x29,
};
#define smc_link_downing(state) \
@@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
-int smc_llc_srv_add_link(struct smc_link *link);
+int smc_llc_srv_add_link(struct smc_link *link,
+ struct smc_llc_qentry *req_qentry);
void smc_llc_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init;
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 6fb6f96c1d17..f13ab0661ed5 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -19,11 +19,19 @@
#include "smc_core.h"
#include "smc_ism.h"
#include "smc_ib.h"
+#include "smc_clc.h"
#include "smc_stats.h"
#include "smc_netlink.h"
-#define SMC_CMD_MAX_ATTR 1
+const struct nla_policy
+smc_gen_ueid_policy[SMC_NLA_EID_TABLE_MAX + 1] = {
+ [SMC_NLA_EID_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
+ [SMC_NLA_EID_TABLE_ENTRY] = { .type = NLA_STRING,
+ .len = SMC_MAX_EID_LEN,
+ },
+};
+#define SMC_CMD_MAX_ATTR 1
/* SMC_GENL generic netlink operation definition */
static const struct genl_ops smc_gen_nl_ops[] = {
{
@@ -66,6 +74,43 @@ static const struct genl_ops smc_gen_nl_ops[] = {
/* can be retrieved by unprivileged users */
.dumpit = smc_nl_get_fback_stats,
},
+ {
+ .cmd = SMC_NETLINK_DUMP_UEID,
+ /* can be retrieved by unprivileged users */
+ .dumpit = smc_nl_dump_ueid,
+ },
+ {
+ .cmd = SMC_NETLINK_ADD_UEID,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_add_ueid,
+ .policy = smc_gen_ueid_policy,
+ },
+ {
+ .cmd = SMC_NETLINK_REMOVE_UEID,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_remove_ueid,
+ .policy = smc_gen_ueid_policy,
+ },
+ {
+ .cmd = SMC_NETLINK_FLUSH_UEID,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_flush_ueid,
+ },
+ {
+ .cmd = SMC_NETLINK_DUMP_SEID,
+ /* can be retrieved by unprivileged users */
+ .dumpit = smc_nl_dump_seid,
+ },
+ {
+ .cmd = SMC_NETLINK_ENABLE_SEID,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_enable_seid,
+ },
+ {
+ .cmd = SMC_NETLINK_DISABLE_SEID,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_disable_seid,
+ },
};
static const struct nla_policy smc_gen_nl_policy[2] = {
diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h
index 5ce2c0a89ccd..e8c6c3f0e98c 100644
--- a/net/smc/smc_netlink.h
+++ b/net/smc/smc_netlink.h
@@ -17,6 +17,8 @@
extern struct genl_family smc_gen_nl_family;
+extern const struct nla_policy smc_gen_ueid_policy[];
+
struct smc_nl_dmp_ctx {
int pos[3];
};
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 4a964e9190b0..67e9d9fde085 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
+static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
+ struct smc_init_info *ini)
+{
+ if (!ini->check_smcrv2 &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
+ NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
+ return 0;
+ }
+ if (ini->check_smcrv2 &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
+ NULL, &ini->smcrv2)) {
+ ini->smcrv2.ib_dev_v2 = ibdev;
+ ini->smcrv2.ib_port_v2 = i;
+ return 0;
+ }
+ return -ENODEV;
+}
+
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini,
@@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_ib_device *ibdev;
int i;
- ini->ib_dev = NULL;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev)
@@ -971,12 +990,9 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- goto out;
+ !test_bit(i - 1, ibdev->ports_going_away)) {
+ if (!smc_pnet_determine_gid(ibdev, i, ini))
+ goto out;
}
}
}
@@ -1016,12 +1032,9 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- break;
+ !test_bit(i - 1, ibdev->ports_going_away)) {
+ if (!smc_pnet_determine_gid(ibdev, i, ini))
+ break;
}
}
}
@@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- ini->ib_dev = NULL;
- ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 170b733bc736..51e8eb2933ff 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -22,6 +22,7 @@
#include "smc_tx.h" /* smc_tx_consumer_update() */
#include "smc_rx.h"
#include "smc_stats.h"
+#include "smc_tracepoint.h"
/* callback implementation to wakeup consumers blocked with smc_rx_wait().
* indirectly called by smc_cdc_msg_recv_action().
@@ -438,6 +439,8 @@ copy:
if (msg && smc_rx_update_consumer(smc, cons, copylen))
goto out;
}
+
+ trace_smc_rx_recvmsg(smc, copylen);
} while (read_remaining);
out:
return read_done;
diff --git a/net/smc/smc_tracepoint.c b/net/smc/smc_tracepoint.c
new file mode 100644
index 000000000000..8d47ced5a492
--- /dev/null
+++ b/net/smc/smc_tracepoint.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define CREATE_TRACE_POINTS
+#include "smc_tracepoint.h"
+
+EXPORT_TRACEPOINT_SYMBOL(smc_switch_to_fallback);
+EXPORT_TRACEPOINT_SYMBOL(smc_tx_sendmsg);
+EXPORT_TRACEPOINT_SYMBOL(smc_rx_recvmsg);
+EXPORT_TRACEPOINT_SYMBOL(smcr_link_down);
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
new file mode 100644
index 000000000000..ec17f29646f5
--- /dev/null
+++ b/net/smc/smc_tracepoint.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smc
+
+#if !defined(_TRACE_SMC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMC_H
+
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/tracepoint.h>
+#include <net/ipv6.h>
+#include "smc.h"
+#include "smc_core.h"
+
+TRACE_EVENT(smc_switch_to_fallback,
+
+ TP_PROTO(const struct smc_sock *smc, int fallback_rsn),
+
+ TP_ARGS(smc, fallback_rsn),
+
+ TP_STRUCT__entry(
+ __field(const void *, sk)
+ __field(const void *, clcsk)
+ __field(int, fallback_rsn)
+ ),
+
+ TP_fast_assign(
+ const struct sock *sk = &smc->sk;
+ const struct sock *clcsk = smc->clcsock->sk;
+
+ __entry->sk = sk;
+ __entry->clcsk = clcsk;
+ __entry->fallback_rsn = fallback_rsn;
+ ),
+
+ TP_printk("sk=%p clcsk=%p fallback_rsn=%d",
+ __entry->sk, __entry->clcsk, __entry->fallback_rsn)
+);
+
+DECLARE_EVENT_CLASS(smc_msg_event,
+
+ TP_PROTO(const struct smc_sock *smc, size_t len),
+
+ TP_ARGS(smc, len),
+
+ TP_STRUCT__entry(
+ __field(const void *, smc)
+ __field(size_t, len)
+ __string(name, smc->conn.lnk->ibname)
+ ),
+
+ TP_fast_assign(
+ __entry->smc = smc;
+ __entry->len = len;
+ __assign_str(name, smc->conn.lnk->ibname);
+ ),
+
+ TP_printk("smc=%p len=%zu dev=%s",
+ __entry->smc, __entry->len,
+ __get_str(name))
+);
+
+DEFINE_EVENT(smc_msg_event, smc_tx_sendmsg,
+
+ TP_PROTO(const struct smc_sock *smc, size_t len),
+
+ TP_ARGS(smc, len)
+);
+
+DEFINE_EVENT(smc_msg_event, smc_rx_recvmsg,
+
+ TP_PROTO(const struct smc_sock *smc, size_t len),
+
+ TP_ARGS(smc, len)
+);
+
+TRACE_EVENT(smcr_link_down,
+
+ TP_PROTO(const struct smc_link *lnk, void *location),
+
+ TP_ARGS(lnk, location),
+
+ TP_STRUCT__entry(
+ __field(const void *, lnk)
+ __field(const void *, lgr)
+ __field(int, state)
+ __string(name, lnk->ibname)
+ __field(void *, location)
+ ),
+
+ TP_fast_assign(
+ const struct smc_link_group *lgr = lnk->lgr;
+
+ __entry->lnk = lnk;
+ __entry->lgr = lgr;
+ __entry->state = lnk->state;
+ __assign_str(name, lnk->ibname);
+ __entry->location = location;
+ ),
+
+ TP_printk("lnk=%p lgr=%p state=%d dev=%s location=%pS",
+ __entry->lnk, __entry->lgr,
+ __entry->state, __get_str(name),
+ __entry->location)
+);
+
+#endif /* _TRACE_SMC_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE smc_tracepoint
+
+#include <trace/define_trace.h>
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c79361dfcdfb..be241d53020f 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -28,6 +28,7 @@
#include "smc_ism.h"
#include "smc_tx.h"
#include "smc_stats.h"
+#include "smc_tracepoint.h"
#define SMC_TX_WORK_DELAY 0
#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
@@ -245,6 +246,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
SMC_TX_CORK_DELAY);
else
smc_tx_sndbuf_nonempty(conn);
+
+ trace_smc_tx_sendmsg(smc, copylen);
} /* while (msg_data_left(msg)) */
return send_done;
@@ -496,7 +499,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -505,8 +508,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
+ if (!link || !smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
+ smc_wr_tx_link_put(link);
if (rc == -EBUSY) {
struct smc_sock *smc =
container_of(conn, struct smc_sock, conn);
@@ -547,22 +553,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
out_unlock:
spin_unlock_bh(&conn->send_lock);
- return rc;
-}
-
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
- int rc = -ENOLINK;
-
- if (!link)
- return rc;
-
- atomic_inc(&link->wr_tx_refcnt);
- if (smc_link_usable(link))
- rc = _smcr_tx_sndbuf_nonempty(conn);
- if (atomic_dec_and_test(&link->wr_tx_refcnt))
- wake_up_all(&link->wr_tx_wait);
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index a419e9af36b9..600ab5889227 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -101,19 +101,32 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
}
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
- if (pnd_snd_idx == link->wr_tx_cnt)
- return;
- link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
- if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
- complete(&link->wr_tx_compl[pnd_snd_idx]);
- memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
- /* clear the full struct smc_wr_tx_pend including .priv */
- memset(&link->wr_tx_pends[pnd_snd_idx], 0,
- sizeof(link->wr_tx_pends[pnd_snd_idx]));
- memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
- sizeof(link->wr_tx_bufs[pnd_snd_idx]));
- if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
- return;
+ if (pnd_snd_idx == link->wr_tx_cnt) {
+ if (link->lgr->smc_version != SMC_V2 ||
+ link->wr_tx_v2_pend->wr_id != wc->wr_id)
+ return;
+ link->wr_tx_v2_pend->wc_status = wc->status;
+ memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
+ /* clear the full struct smc_wr_tx_pend including .priv */
+ memset(link->wr_tx_v2_pend, 0,
+ sizeof(*link->wr_tx_v2_pend));
+ memset(link->lgr->wr_tx_buf_v2, 0,
+ sizeof(*link->lgr->wr_tx_buf_v2));
+ } else {
+ link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
+ if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
+ complete(&link->wr_tx_compl[pnd_snd_idx]);
+ memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
+ sizeof(pnd_snd));
+ /* clear the full struct smc_wr_tx_pend including .priv */
+ memset(&link->wr_tx_pends[pnd_snd_idx], 0,
+ sizeof(link->wr_tx_pends[pnd_snd_idx]));
+ memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
+ sizeof(link->wr_tx_bufs[pnd_snd_idx]));
+ if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
+ return;
+ }
+
if (wc->status) {
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
/* clear full struct smc_wr_tx_pend including .priv */
@@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
+ if (link->lgr->smc_version == SMC_V2) {
+ memset(link->wr_tx_v2_pend, 0,
+ sizeof(*link->wr_tx_v2_pend));
+ memset(link->lgr->wr_tx_buf_v2, 0,
+ sizeof(*link->lgr->wr_tx_buf_v2));
+ }
/* terminate link */
smcr_link_down_cond_sched(link);
}
@@ -239,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
return 0;
}
+int smc_wr_tx_get_v2_slot(struct smc_link *link,
+ smc_wr_tx_handler handler,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **wr_pend_priv)
+{
+ struct smc_wr_tx_pend *wr_pend;
+ struct ib_send_wr *wr_ib;
+ u64 wr_id;
+
+ if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt)
+ return -EBUSY;
+
+ *wr_buf = NULL;
+ *wr_pend_priv = NULL;
+ wr_id = smc_wr_tx_get_next_wr_id(link);
+ wr_pend = link->wr_tx_v2_pend;
+ wr_pend->wr_id = wr_id;
+ wr_pend->handler = handler;
+ wr_pend->link = link;
+ wr_pend->idx = link->wr_tx_cnt;
+ wr_ib = link->wr_tx_v2_ib;
+ wr_ib->wr_id = wr_id;
+ *wr_buf = link->lgr->wr_tx_buf_v2;
+ *wr_pend_priv = &wr_pend->priv;
+ return 0;
+}
+
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv)
{
@@ -256,6 +302,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
test_and_clear_bit(idx, link->wr_tx_mask);
wake_up(&link->wr_tx_wait);
return 1;
+ } else if (link->lgr->smc_version == SMC_V2 &&
+ pend->idx == link->wr_tx_cnt) {
+ /* Large v2 buffer */
+ memset(&link->wr_tx_v2_pend, 0,
+ sizeof(link->wr_tx_v2_pend));
+ memset(&link->lgr->wr_tx_buf_v2, 0,
+ sizeof(link->lgr->wr_tx_buf_v2));
+ return 1;
}
return 0;
@@ -280,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc;
}
+int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ int len)
+{
+ int rc;
+
+ link->wr_tx_v2_ib->sg_list[0].length = len;
+ ib_req_notify_cq(link->smcibdev->roce_cq_send,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL);
+ if (rc) {
+ smc_wr_tx_put_slot(link, priv);
+ smcr_link_down_cond_sched(link);
+ }
+ return rc;
+}
+
/* Send prepared WR slot via ib_post_send and wait for send completion
* notification.
* @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
@@ -517,6 +587,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk)
{
+ int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) {
@@ -545,14 +616,44 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
}
+
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr;
+ lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE;
+ lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey;
+
+ lnk->wr_tx_v2_ib->next = NULL;
+ lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge;
+ lnk->wr_tx_v2_ib->num_sge = 1;
+ lnk->wr_tx_v2_ib->opcode = IB_WR_SEND;
+ lnk->wr_tx_v2_ib->send_flags =
+ IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ }
+
+ /* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE.
+ * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer
+ * and the same buffer for all sges. When a larger message arrived then
+ * the content of the first small sge is copied to the beginning of
+ * the larger spillover buffer, allowing easy data mapping.
+ */
for (i = 0; i < lnk->wr_rx_cnt; i++) {
- lnk->wr_rx_sges[i].addr =
+ int x = i * sges_per_buf;
+
+ lnk->wr_rx_sges[x].addr =
lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
- lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
- lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+ lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_rx_sges[x + 1].addr =
+ lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x + 1].length =
+ SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x + 1].lkey =
+ lnk->roce_pd->local_dma_lkey;
+ }
lnk->wr_rx_ibs[i].next = NULL;
- lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
- lnk->wr_rx_ibs[i].num_sge = 1;
+ lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
+ lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
}
lnk->wr_reg.wr.next = NULL;
lnk->wr_reg.wr.num_sge = 0;
@@ -585,16 +686,45 @@ void smc_wr_free_link(struct smc_link *lnk)
DMA_FROM_DEVICE);
lnk->wr_rx_dma_addr = 0;
}
+ if (lnk->wr_rx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ lnk->wr_rx_v2_dma_addr = 0;
+ }
if (lnk->wr_tx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE);
lnk->wr_tx_dma_addr = 0;
}
+ if (lnk->wr_tx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ lnk->wr_tx_v2_dma_addr = 0;
+ }
+}
+
+void smc_wr_free_lgr_mem(struct smc_link_group *lgr)
+{
+ if (lgr->smc_version < SMC_V2)
+ return;
+
+ kfree(lgr->wr_rx_buf_v2);
+ lgr->wr_rx_buf_v2 = NULL;
+ kfree(lgr->wr_tx_buf_v2);
+ lgr->wr_tx_buf_v2 = NULL;
}
void smc_wr_free_link_mem(struct smc_link *lnk)
{
+ kfree(lnk->wr_tx_v2_ib);
+ lnk->wr_tx_v2_ib = NULL;
+ kfree(lnk->wr_tx_v2_sge);
+ lnk->wr_tx_v2_sge = NULL;
+ kfree(lnk->wr_tx_v2_pend);
+ lnk->wr_tx_v2_pend = NULL;
kfree(lnk->wr_tx_compl);
lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends);
@@ -619,8 +749,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_rx_bufs = NULL;
}
+int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
+{
+ if (lgr->smc_version < SMC_V2)
+ return 0;
+
+ lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
+ if (!lgr->wr_rx_buf_v2)
+ return -ENOMEM;
+ lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
+ if (!lgr->wr_tx_buf_v2) {
+ kfree(lgr->wr_rx_buf_v2);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
int smc_wr_alloc_link_mem(struct smc_link *link)
{
+ int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
+
/* allocate link related memory */
link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
if (!link->wr_tx_bufs)
@@ -653,7 +801,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
if (!link->wr_tx_sges)
goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
- sizeof(link->wr_rx_sges[0]),
+ sizeof(link->wr_rx_sges[0]) * sges_per_buf,
GFP_KERNEL);
if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges;
@@ -672,8 +820,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_tx_compl)
goto no_mem_wr_tx_pends;
+
+ if (link->lgr->smc_version == SMC_V2) {
+ link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_ib)
+ goto no_mem_tx_compl;
+ link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_sge)
+ goto no_mem_v2_ib;
+ link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_pend)
+ goto no_mem_v2_sge;
+ }
return 0;
+no_mem_v2_sge:
+ kfree(link->wr_tx_v2_sge);
+no_mem_v2_ib:
+ kfree(link->wr_tx_v2_ib);
+no_mem_tx_compl:
+ kfree(link->wr_tx_compl);
no_mem_wr_tx_pends:
kfree(link->wr_tx_pends);
no_mem_wr_tx_mask:
@@ -725,6 +894,24 @@ int smc_wr_create_link(struct smc_link *lnk)
rc = -EIO;
goto out;
}
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
+ lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
+ lnk->wr_rx_v2_dma_addr = 0;
+ rc = -EIO;
+ goto dma_unmap;
+ }
+ lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
+ lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) {
+ lnk->wr_tx_v2_dma_addr = 0;
+ rc = -EIO;
+ goto dma_unmap;
+ }
+ }
lnk->wr_tx_dma_addr = ib_dma_map_single(
ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE);
@@ -742,6 +929,18 @@ int smc_wr_create_link(struct smc_link *lnk)
return rc;
dma_unmap:
+ if (lnk->wr_rx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ lnk->wr_rx_v2_dma_addr = 0;
+ }
+ if (lnk->wr_tx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ lnk->wr_tx_v2_dma_addr = 0;
+ }
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
DMA_FROM_DEVICE);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 423b8709f1c9..f353311e6f84 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -60,6 +60,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
atomic_long_set(wr_tx_id, val);
}
+static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+{
+ if (!smc_link_usable(link))
+ return false;
+ atomic_inc(&link->wr_tx_refcnt);
+ return true;
+}
+
+static inline void smc_wr_tx_link_put(struct smc_link *link)
+{
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+}
+
static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
{
wake_up_all(&lnk->wr_tx_wait);
@@ -87,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link)
int smc_wr_create_link(struct smc_link *lnk);
int smc_wr_alloc_link_mem(struct smc_link *lnk);
+int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr);
void smc_wr_free_link(struct smc_link *lnk);
void smc_wr_free_link_mem(struct smc_link *lnk);
+void smc_wr_free_lgr_mem(struct smc_link_group *lgr);
void smc_wr_remember_qp_attr(struct smc_link *lnk);
void smc_wr_remove_dev(struct smc_ib_device *smcibdev);
void smc_wr_add_dev(struct smc_ib_device *smcibdev);
@@ -97,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv);
+int smc_wr_tx_get_v2_slot(struct smc_link *link,
+ smc_wr_tx_handler handler,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
+int smc_wr_tx_v2_send(struct smc_link *link,
+ struct smc_wr_tx_pend_priv *priv, int len);
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 9c0343568d2a..1a72c67afed5 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -27,18 +27,10 @@
static struct workqueue_struct *strp_wq;
-struct _strp_msg {
- /* Internal cb structure. struct strp_msg must be first for passing
- * to upper layer.
- */
- struct strp_msg strp;
- int accum_len;
-};
-
static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
{
return (struct _strp_msg *)((void *)skb->cb +
- offsetof(struct qdisc_skb_cb, data));
+ offsetof(struct sk_skb_cb, strp));
}
/* Lower lock held */
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 6e4dbd577a39..d435bffc6199 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -162,8 +162,10 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
const size_t buflen, const char *delim,
struct sockaddr_in6 *sin6)
{
- char *p;
+ char p[IPV6_SCOPE_ID_LEN + 1];
size_t len;
+ u32 scope_id = 0;
+ struct net_device *dev;
if ((buf + buflen) == delim)
return 1;
@@ -175,29 +177,23 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
return 0;
len = (buf + buflen) - delim - 1;
- p = kmemdup_nul(delim + 1, len, GFP_KERNEL);
- if (p) {
- u32 scope_id = 0;
- struct net_device *dev;
-
- dev = dev_get_by_name(net, p);
- if (dev != NULL) {
- scope_id = dev->ifindex;
- dev_put(dev);
- } else {
- if (kstrtou32(p, 10, &scope_id) != 0) {
- kfree(p);
- return 0;
- }
- }
-
- kfree(p);
-
- sin6->sin6_scope_id = scope_id;
- return 1;
+ if (len > IPV6_SCOPE_ID_LEN)
+ return 0;
+
+ memcpy(p, delim + 1, len);
+ p[len] = 0;
+
+ dev = dev_get_by_name(net, p);
+ if (dev != NULL) {
+ scope_id = dev->ifindex;
+ dev_put(dev);
+ } else {
+ if (kstrtou32(p, 10, &scope_id) != 0)
+ return 0;
}
- return 0;
+ sin6->sin6_scope_id = scope_id;
+ return 1;
}
static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3e776e3dff91..b87565b64928 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -645,7 +645,7 @@ static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci,
}
__set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
goto ok;
- } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
+ } else if (seq_num + GSS_SEQ_WIN <= sd->sd_max) {
goto toolow;
}
if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
@@ -781,7 +781,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
xdr_seq = kmalloc(4, GFP_KERNEL);
if (!xdr_seq)
- return -1;
+ return -ENOMEM;
*xdr_seq = htonl(seq);
iov.iov_base = xdr_seq;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f056ff931444..a312ea2bc440 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1076,24 +1076,21 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
static
void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
-
- if (clnt != NULL) {
- rpc_task_set_transport(task, clnt);
- task->tk_client = clnt;
- refcount_inc(&clnt->cl_count);
- if (clnt->cl_softrtry)
- task->tk_flags |= RPC_TASK_SOFT;
- if (clnt->cl_softerr)
- task->tk_flags |= RPC_TASK_TIMEOUT;
- if (clnt->cl_noretranstimeo)
- task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- if (atomic_read(&clnt->cl_swapper))
- task->tk_flags |= RPC_TASK_SWAPPER;
- /* Add to the client's list of all tasks */
- spin_lock(&clnt->cl_lock);
- list_add_tail(&task->tk_task, &clnt->cl_tasks);
- spin_unlock(&clnt->cl_lock);
- }
+ rpc_task_set_transport(task, clnt);
+ task->tk_client = clnt;
+ refcount_inc(&clnt->cl_count);
+ if (clnt->cl_softrtry)
+ task->tk_flags |= RPC_TASK_SOFT;
+ if (clnt->cl_softerr)
+ task->tk_flags |= RPC_TASK_TIMEOUT;
+ if (clnt->cl_noretranstimeo)
+ task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
+ if (atomic_read(&clnt->cl_swapper))
+ task->tk_flags |= RPC_TASK_SWAPPER;
+ /* Add to the client's list of all tasks */
+ spin_lock(&clnt->cl_lock);
+ list_add_tail(&task->tk_task, &clnt->cl_tasks);
+ spin_unlock(&clnt->cl_lock);
}
static void
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c045f63d11fa..e2c835482791 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -277,9 +277,17 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
static void rpc_task_set_debuginfo(struct rpc_task *task)
{
- static atomic_t rpc_pid;
+ struct rpc_clnt *clnt = task->tk_client;
- task->tk_pid = atomic_inc_return(&rpc_pid);
+ /* Might be a task carrying a reverse-direction operation */
+ if (!clnt) {
+ static atomic_t rpc_pid;
+
+ task->tk_pid = atomic_inc_return(&rpc_pid);
+ return;
+ }
+
+ task->tk_pid = atomic_inc_return(&clnt->cl_pid);
}
#else
static inline void rpc_task_set_debuginfo(struct rpc_task *task)
@@ -829,6 +837,7 @@ void rpc_exit_task(struct rpc_task *task)
else if (task->tk_client)
rpc_count_iostats(task, task->tk_client->cl_metrics);
if (task->tk_ops->rpc_call_done != NULL) {
+ trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done);
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
/* Always release the RPC slot and buffer memory */
@@ -903,8 +912,10 @@ static void __rpc_execute(struct rpc_task *task)
/*
* Lockless check for whether task is sleeping or not.
*/
- if (!RPC_IS_QUEUED(task))
+ if (!RPC_IS_QUEUED(task)) {
+ cond_resched();
continue;
+ }
/*
* Signalled tasks should exit rather than sleep.
@@ -1230,8 +1241,7 @@ static int rpciod_start(void)
if (!wq)
goto out_failed;
rpciod_workqueue = wq;
- /* Note: highpri because network receive is latency sensitive */
- wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0);
+ wq = alloc_workqueue("xprtiod", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!wq)
goto free_rpciod;
xprtiod_workqueue = wq;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a3bbe5ce4570..4292278a9552 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1186,45 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
-static int
-svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
-{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
- const struct svc_procedure *procp = rqstp->rq_procinfo;
-
- /*
- * Decode arguments
- * XXX: why do we ignore the return value?
- */
- if (procp->pc_decode &&
- !procp->pc_decode(rqstp, argv->iov_base)) {
- *statp = rpc_garbage_args;
- return 1;
- }
-
- *statp = procp->pc_func(rqstp);
-
- if (*statp == rpc_drop_reply ||
- test_bit(RQ_DROPME, &rqstp->rq_flags))
- return 0;
-
- if (rqstp->rq_auth_stat != rpc_auth_ok)
- return 1;
-
- if (*statp != rpc_success)
- return 1;
-
- /* Encode reply */
- if (procp->pc_encode &&
- !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
- dprintk("svc: failed to encode reply\n");
- /* serv->sv_stats->rpcsystemerr++; */
- *statp = rpc_system_err;
- }
- return 1;
-}
-
__be32
svc_generic_init_request(struct svc_rqst *rqstp,
const struct svc_program *progp,
@@ -1291,7 +1252,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
__be32 *statp;
u32 prog, vers;
__be32 rpc_stat;
- int auth_res;
+ int auth_res, rc;
__be32 *reply_statp;
rpc_stat = rpc_success;
@@ -1392,28 +1353,18 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
/* Call the function that processes the request. */
- if (!process.dispatch) {
- if (!svc_generic_dispatch(rqstp, statp))
- goto release_dropit;
- if (*statp == rpc_garbage_args)
- goto err_garbage;
- } else {
- dprintk("svc: calling dispatcher\n");
- if (!process.dispatch(rqstp, statp))
- goto release_dropit; /* Release reply info */
- }
-
+ rc = process.dispatch(rqstp, statp);
+ if (procp->pc_release)
+ procp->pc_release(rqstp);
+ if (!rc)
+ goto dropit;
if (rqstp->rq_auth_stat != rpc_auth_ok)
- goto err_release_bad_auth;
+ goto err_bad_auth;
/* Check RPC status result */
if (*statp != rpc_success)
resv->iov_len = ((void*)statp) - resv->iov_base + 4;
- /* Release reply info */
- if (procp->pc_release)
- procp->pc_release(rqstp);
-
if (procp->pc_encode == NULL)
goto dropit;
@@ -1422,9 +1373,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto close_xprt;
return 1; /* Caller can now send it */
-release_dropit:
- if (procp->pc_release)
- procp->pc_release(rqstp);
dropit:
svc_authorise(rqstp); /* doesn't hurt to call this twice */
dprintk("svc: svc_process dropit\n");
@@ -1451,9 +1399,6 @@ err_bad_rpc:
svc_putnl(resv, 2);
goto sendit;
-err_release_bad_auth:
- if (procp->pc_release)
- procp->pc_release(rqstp);
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
@@ -1676,16 +1621,17 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload);
/**
* svc_fill_write_vector - Construct data argument for VFS write call
* @rqstp: svc_rqst to operate on
- * @pages: list of pages containing data payload
- * @first: buffer containing first section of write payload
- * @total: total number of bytes of write payload
+ * @payload: xdr_buf containing only the write data payload
*
* Fills in rqstp::rq_vec, and returns the number of elements.
*/
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
- struct kvec *first, size_t total)
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
+ struct xdr_buf *payload)
{
+ struct page **pages = payload->pages;
+ struct kvec *first = payload->head;
struct kvec *vec = rqstp->rq_vec;
+ size_t total = payload->len;
unsigned int i;
/* Some types of transport can present the write payload
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6316bd2b8f37..1e99ba1b9d72 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -687,6 +687,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
set_current_state(TASK_RUNNING);
return -EINTR;
}
+ trace_svc_alloc_arg_err(pages);
schedule_timeout(msecs_to_jiffies(500));
}
rqstp->rq_page_end = &rqstp->rq_pages[pages];
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 9a6f17e18f73..2766dd21935b 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -109,8 +109,10 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
struct sock_xprt *sock;
ssize_t ret = -1;
- if (!xprt)
- return 0;
+ if (!xprt || !xprt_connected(xprt)) {
+ xprt_put(xprt);
+ return -ENOTCONN;
+ }
sock = container_of(xprt, struct sock_xprt, xprt);
if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
@@ -129,8 +131,10 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
ssize_t ret;
- if (!xprt)
- return 0;
+ if (!xprt || !xprt_connected(xprt)) {
+ xprt_put(xprt);
+ return -ENOTCONN;
+ }
ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
"max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index ca10ba2626f2..df194cc07035 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1633,7 +1633,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
* Sets up @subbuf to represent a portion of @xdr. The portion
* starts at the current offset in @xdr, and extends for a length
* of @nbytes. If this is successful, @xdr is advanced to the next
- * position following that portion.
+ * XDR data item following that portion.
*
* Return values:
* %true: @subbuf has been initialized, and @xdr has been advanced.
@@ -1642,29 +1642,31 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
unsigned int nbytes)
{
- unsigned int remaining, offset, len;
+ unsigned int start = xdr_stream_pos(xdr);
+ unsigned int remaining, len;
- if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes))
+ /* Extract @subbuf and bounds-check the fn arguments */
+ if (xdr_buf_subsegment(xdr->buf, subbuf, start, nbytes))
return false;
- if (subbuf->head[0].iov_len)
- if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len))
- return false;
-
- remaining = subbuf->page_len;
- offset = subbuf->page_base;
- while (remaining) {
- len = min_t(unsigned int, remaining, PAGE_SIZE) - offset;
-
+ /* Advance @xdr by @nbytes */
+ for (remaining = nbytes; remaining;) {
if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
return false;
- if (!__xdr_inline_decode(xdr, len))
- return false;
+ len = (char *)xdr->end - (char *)xdr->p;
+ if (remaining <= len) {
+ xdr->p = (__be32 *)((char *)xdr->p +
+ (remaining + xdr_pad_size(nbytes)));
+ break;
+ }
+
+ xdr->p = (__be32 *)((char *)xdr->p + len);
+ xdr->end = xdr->p;
remaining -= len;
- offset = 0;
}
+ xdr_stream_set_pos(xdr, start + nbytes);
return true;
}
EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index cfd681700d1a..a02de2bddb28 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -246,11 +246,9 @@ EXPORT_SYMBOL_GPL(xprt_find_transport_ident);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
xprt->snd_task = NULL;
- if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- smp_mb__before_atomic();
- clear_bit(XPRT_LOCKED, &xprt->state);
- smp_mb__after_atomic();
- } else
+ if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ clear_bit_unlock(XPRT_LOCKED, &xprt->state);
+ else
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
}
@@ -737,6 +735,8 @@ static void xprt_autoclose(struct work_struct *work)
unsigned int pflags = memalloc_nofs_save();
trace_xprt_disconnect_auto(xprt);
+ xprt->connect_cookie++;
+ smp_mb__before_atomic();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
@@ -767,7 +767,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
*/
static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
{
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ return;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
@@ -1603,15 +1604,14 @@ xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *next, *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- int counter, status;
+ int status;
spin_lock(&xprt->queue_lock);
- counter = 0;
- while (!list_empty(&xprt->xmit_queue)) {
- if (++counter == 20)
+ for (;;) {
+ next = list_first_entry_or_null(&xprt->xmit_queue,
+ struct rpc_rqst, rq_xmit);
+ if (!next)
break;
- next = list_first_entry(&xprt->xmit_queue,
- struct rpc_rqst, rq_xmit);
xprt_pin_rqst(next);
spin_unlock(&xprt->queue_lock);
status = xprt_request_transmit(next, task);
@@ -1619,13 +1619,16 @@ xprt_transmit(struct rpc_task *task)
status = 0;
spin_lock(&xprt->queue_lock);
xprt_unpin_rqst(next);
- if (status == 0) {
- if (!xprt_request_data_received(task) ||
- test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
- continue;
- } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
- task->tk_status = status;
- break;
+ if (status < 0) {
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ task->tk_status = status;
+ break;
+ }
+ /* Was @task transmitted, and has it received a reply? */
+ if (xprt_request_data_received(task) &&
+ !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ break;
+ cond_resched_lock(&xprt->queue_lock);
}
spin_unlock(&xprt->queue_lock);
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index f700b34a5bfd..ff699307e820 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -515,8 +515,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* a single ib_post_send() call.
*/
prev = &first;
- while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
-
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ do {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
@@ -533,7 +533,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*prev = last;
prev = &last->next;
- }
+ } while ((mr = rpcrdma_mr_pop(&req->rl_registered)));
+
mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the
@@ -617,8 +618,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* a single ib_post_send() call.
*/
prev = &first;
- while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
-
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ do {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
@@ -635,7 +636,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*prev = last;
prev = &last->next;
- }
+ } while ((mr = rpcrdma_mr_pop(&req->rl_registered)));
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
@@ -666,3 +667,38 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
rpcrdma_force_disconnect(ep);
}
+
+/**
+ * frwr_wp_create - Create an MR for padding Write chunks
+ * @r_xprt: transport resources to use
+ *
+ * Return 0 on success, negative errno on failure.
+ */
+int frwr_wp_create(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct rpcrdma_mr_seg seg;
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
+ return -EAGAIN;
+ mr->mr_req = NULL;
+ ep->re_write_pad_mr = mr;
+
+ seg.mr_len = XDR_UNIT;
+ seg.mr_page = virt_to_page(ep->re_write_pad);
+ seg.mr_offset = offset_in_page(ep->re_write_pad);
+ if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr)))
+ return -EIO;
+ trace_xprtrdma_mr_fastreg(mr);
+
+ mr->mr_cqe.done = frwr_wc_fastreg;
+ mr->mr_regwr.wr.next = NULL;
+ mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+ mr->mr_regwr.wr.num_sge = 0;
+ mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+ mr->mr_regwr.wr.send_flags = 0;
+
+ return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL);
+}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c335c1361564..8035a983c8ce 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -255,15 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = 0;
}
- if (type == rpcrdma_readch)
- goto out;
-
- /* When encoding a Write chunk, some servers need to see an
- * extra segment for non-XDR-aligned Write chunks. The upper
- * layer provides space in the tail iovec that may be used
- * for this purpose.
- */
- if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
+ if (type == rpcrdma_readch || type == rpcrdma_writech)
goto out;
if (xdrbuf->tail[0].iov_len)
@@ -405,6 +397,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
enum rpcrdma_chunktype wtype)
{
struct xdr_stream *xdr = &req->rl_stream;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mr *mr;
int nsegs, nchunks;
@@ -443,6 +436,18 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
nsegs -= mr->mr_nents;
} while (nsegs);
+ if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
+ if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
+ return -EMSGSIZE;
+
+ trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
+ nsegs);
+ r_xprt->rx_stats.write_chunk_count++;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
+ nchunks++;
+ nsegs -= mr->mr_nents;
+ }
+
/* Update count of segments in this Write chunk */
*segcount = cpu_to_be32(nchunks);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 6be23ce7a93d..cf76a6ad127b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
- trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);
if (wc->status != IB_WC_SUCCESS)
goto flushed;
+ trace_svcrdma_wc_recv(wc, &ctxt->rc_cid);
/* If receive posting fails, the connection is about to be
* lost anyway. The server will not be able to send a reply
@@ -345,7 +345,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
*/
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
- goto flushed;
+ goto dropped;
/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;
@@ -360,6 +360,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
flushed:
+ if (wc->status == IB_WC_WR_FLUSH_ERR)
+ trace_svcrdma_wc_recv_flush(wc, &ctxt->rc_cid);
+ else
+ trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid);
+dropped:
svc_rdma_recv_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e27433f08ca7..5f0155fdefc7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -155,6 +155,7 @@ struct svc_rdma_chunk_ctxt {
struct ib_cqe cc_cqe;
struct svcxprt_rdma *cc_rdma;
struct list_head cc_rwctxts;
+ ktime_t cc_posttime;
int cc_sqecount;
enum ib_wc_status cc_status;
struct completion cc_done;
@@ -267,7 +268,16 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
- trace_svcrdma_wc_write(wc, &cc->cc_cid);
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ trace_svcrdma_wc_write(wc, &cc->cc_cid);
+ break;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
+ }
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
@@ -320,11 +330,22 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svc_rdma_read_info *info;
- trace_svcrdma_wc_read(wc, &cc->cc_cid);
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ info = container_of(cc, struct svc_rdma_read_info, ri_cc);
+ trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes,
+ cc->cc_posttime);
+ break;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_read_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_read_err(wc, &cc->cc_cid);
+ }
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+ svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
@@ -363,6 +384,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
do {
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
+ cc->cc_posttime = ktime_get();
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret)
break;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 599021b2391d..22a871e6fe4d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -280,13 +280,21 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
-
svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
if (unlikely(wc->status != IB_WC_SUCCESS))
- svc_xprt_deferred_close(&rdma->sc_xprt);
+ goto flushed;
+
+ trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ return;
+
+flushed:
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
+ else
+ trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index aaec3c9be8db..3d3673ba9e1e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -205,14 +205,12 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
- ep->re_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
- ep->re_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
@@ -551,6 +549,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
goto out;
}
rpcrdma_mrs_create(r_xprt);
+ frwr_wp_create(r_xprt);
out:
trace_xprtrdma_connect(r_xprt, rc);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d91f54eae00b..c79f92eeda76 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -68,13 +68,14 @@
/*
* RDMA Endpoint -- connection endpoint details
*/
+struct rpcrdma_mr;
struct rpcrdma_ep {
struct kref re_kref;
struct rdma_cm_id *re_id;
struct ib_pd *re_pd;
unsigned int re_max_rdma_segs;
unsigned int re_max_fr_depth;
- bool re_implicit_roundup;
+ struct rpcrdma_mr *re_write_pad_mr;
enum ib_mr_type re_mrtype;
struct completion re_done;
unsigned int re_send_count;
@@ -97,6 +98,8 @@ struct rpcrdma_ep {
unsigned int re_inline_recv; /* negotiated */
atomic_t re_completion_ids;
+
+ char re_write_pad[XDR_UNIT];
};
/* Pre-allocate extra Work Requests for handling reverse-direction
@@ -535,6 +538,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
+int frwr_wp_create(struct rpcrdma_xprt *r_xprt);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 04f1b78bcbca..ae48c9c84ee1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1134,6 +1134,7 @@ static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
+ xprt->connect_cookie++;
smp_mb__before_atomic();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
@@ -1153,14 +1154,13 @@ static void xs_error_report(struct sock *sk)
struct sock_xprt *transport;
struct rpc_xprt *xprt;
- read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
- goto out;
+ return;
transport = container_of(xprt, struct sock_xprt, xprt);
transport->xprt_err = -sk->sk_err;
if (transport->xprt_err == 0)
- goto out;
+ return;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -transport->xprt_err);
trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
@@ -1168,8 +1168,6 @@ static void xs_error_report(struct sock *sk)
/* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
smp_mb__before_atomic();
xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
- out:
- read_unlock_bh(&sk->sk_callback_lock);
}
static void xs_reset_transport(struct sock_xprt *transport)
@@ -1188,7 +1186,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
kernel_sock_shutdown(sock, SHUT_RDWR);
mutex_lock(&transport->recv_mutex);
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
transport->inet = NULL;
transport->sock = NULL;
transport->file = NULL;
@@ -1197,10 +1195,10 @@ static void xs_reset_transport(struct sock_xprt *transport)
xs_restore_old_callbacks(transport, sk);
xprt_clear_connected(xprt);
- write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
/* Reset stream record info */
xs_stream_reset_connect(transport);
+ release_sock(sk);
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
@@ -1364,7 +1362,6 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
- read_lock_bh(&sk->sk_callback_lock);
dprintk("RPC: xs_data_ready...\n");
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
@@ -1379,7 +1376,6 @@ static void xs_data_ready(struct sock *sk)
if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker);
}
- read_unlock_bh(&sk->sk_callback_lock);
}
/*
@@ -1408,9 +1404,8 @@ static void xs_tcp_state_change(struct sock *sk)
struct rpc_xprt *xprt;
struct sock_xprt *transport;
- read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
- goto out;
+ return;
dprintk("RPC: xs_tcp_state_change client %p...\n", xprt);
dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n",
sk->sk_state, xprt_connected(xprt),
@@ -1471,8 +1466,6 @@ static void xs_tcp_state_change(struct sock *sk)
/* Trigger the socket release */
xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
}
- out:
- read_unlock_bh(&sk->sk_callback_lock);
}
static void xs_write_space(struct sock *sk)
@@ -1511,13 +1504,9 @@ out:
*/
static void xs_udp_write_space(struct sock *sk)
{
- read_lock_bh(&sk->sk_callback_lock);
-
/* from net/core/sock.c:sock_def_write_space */
if (sock_writeable(sk))
xs_write_space(sk);
-
- read_unlock_bh(&sk->sk_callback_lock);
}
/**
@@ -1532,13 +1521,9 @@ static void xs_udp_write_space(struct sock *sk)
*/
static void xs_tcp_write_space(struct sock *sk)
{
- read_lock_bh(&sk->sk_callback_lock);
-
/* from net/core/stream.c:sk_stream_write_space */
if (sk_stream_is_writeable(sk))
xs_write_space(sk);
-
- read_unlock_bh(&sk->sk_callback_lock);
}
static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
@@ -1833,7 +1818,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
if (!transport->inet) {
struct sock *sk = sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
xs_save_old_callbacks(transport, sk);
@@ -1849,7 +1834,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
transport->sock = sock;
transport->inet = sk;
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
xs_stream_start_connect(transport);
@@ -2031,7 +2016,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
xs_save_old_callbacks(transport, sk);
@@ -2048,7 +2033,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
xs_udp_do_set_buffer_size(xprt);
@@ -2174,7 +2159,6 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- int ret = -ENOTCONN;
if (!transport->inet) {
struct sock *sk = sock->sk;
@@ -2194,7 +2178,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_tcp_set_socket_timeouts(xprt, sock);
tcp_sock_set_nodelay(sk);
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
xs_save_old_callbacks(transport, sk);
@@ -2214,11 +2198,11 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
transport->sock = sock;
transport->inet = sk;
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
if (!xprt_bound(xprt))
- goto out;
+ return -ENOTCONN;
xs_set_memalloc(xprt);
@@ -2226,22 +2210,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
- ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
- switch (ret) {
- case 0:
- xs_set_srcport(transport, sock);
- fallthrough;
- case -EINPROGRESS:
- /* SYN_SENT! */
- if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
- break;
- case -EADDRNOTAVAIL:
- /* Source port number is unavailable. Try a new one! */
- transport->srcport = 0;
- }
-out:
- return ret;
+ return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
}
/**
@@ -2256,14 +2225,14 @@ static void xs_tcp_setup_socket(struct work_struct *work)
container_of(work, struct sock_xprt, connect_worker.work);
struct socket *sock = transport->sock;
struct rpc_xprt *xprt = &transport->xprt;
- int status = -EIO;
+ int status;
if (!sock) {
sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_STREAM,
IPPROTO_TCP, true);
if (IS_ERR(sock)) {
- status = PTR_ERR(sock);
+ xprt_wake_pending_tasks(xprt, PTR_ERR(sock));
goto out;
}
}
@@ -2280,21 +2249,21 @@ static void xs_tcp_setup_socket(struct work_struct *work)
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
switch (status) {
- default:
- printk("%s: connect returned unhandled error %d\n",
- __func__, status);
- fallthrough;
- case -EADDRNOTAVAIL:
- /* We're probably in TIME_WAIT. Get rid of existing socket,
- * and retry
- */
- xs_tcp_force_close(xprt);
- break;
case 0:
+ xs_set_srcport(transport, sock);
+ fallthrough;
case -EINPROGRESS:
+ /* SYN_SENT! */
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ fallthrough;
case -EALREADY:
- xprt_unlock_connect(xprt, transport);
- return;
+ goto out_unlock;
+ case -EADDRNOTAVAIL:
+ /* Source port number is unavailable. Try a new one! */
+ transport->srcport = 0;
+ status = -EAGAIN;
+ break;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
@@ -2306,18 +2275,22 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
- /* xs_tcp_force_close() wakes tasks with a fixed error code.
- * We need to wake them first to ensure the correct error code.
- */
- xprt_wake_pending_tasks(xprt, status);
- xs_tcp_force_close(xprt);
- goto out;
+ break;
+ default:
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+ status = -EAGAIN;
}
- status = -EAGAIN;
+
+ /* xs_tcp_force_close() wakes tasks with a fixed error code.
+ * We need to wake them first to ensure the correct error code.
+ */
+ xprt_wake_pending_tasks(xprt, status);
+ xs_tcp_force_close(xprt);
out:
xprt_clear_connecting(xprt);
+out_unlock:
xprt_unlock_connect(xprt, transport);
- xprt_wake_pending_tasks(xprt, status);
}
/**
@@ -2341,7 +2314,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
- if (transport->sock != NULL) {
+ if (transport->sock != NULL && !xprt_connecting(xprt)) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
"seconds\n",
xprt, xprt->reestablish_timeout / HZ);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 0b2c18efc079..83460470e883 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -428,17 +428,17 @@ switchdev_lower_dev_find(struct net_device *dev,
return switchdev_priv.lower_dev;
}
-static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
- const struct net_device *orig_dev,
+static int __switchdev_handle_fdb_event_to_device(struct net_device *dev,
+ struct net_device *orig_dev, unsigned long event,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
- int (*add_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
+ int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_add_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
+ int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info))
{
const struct switchdev_notifier_info *info = &fdb_info->info;
@@ -447,17 +447,17 @@ static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
int err = -EOPNOTSUPP;
if (check_cb(dev))
- return add_cb(dev, orig_dev, info->ctx, fdb_info);
+ return mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
if (netif_is_lag_master(dev)) {
if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
goto maybe_bridged_with_us;
/* This is a LAG interface that we offload */
- if (!lag_add_cb)
+ if (!lag_mod_cb)
return -EOPNOTSUPP;
- return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+ return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
}
/* Recurse through lower interfaces in case the FDB entry is pointing
@@ -481,10 +481,10 @@ static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
foreign_dev_check_cb))
continue;
- err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
- fdb_info, check_cb,
- foreign_dev_check_cb,
- add_cb, lag_add_cb);
+ err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
+ event, fdb_info, check_cb,
+ foreign_dev_check_cb,
+ mod_cb, lag_mod_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
@@ -503,140 +503,34 @@ maybe_bridged_with_us:
if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
return 0;
- return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
- check_cb, foreign_dev_check_cb,
- add_cb, lag_add_cb);
+ return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ mod_cb, lag_mod_cb);
}
-int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
- int (*add_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
+ int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_add_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
+ int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info))
{
int err;
- err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
- check_cb,
- foreign_dev_check_cb,
- add_cb, lag_add_cb);
+ err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ mod_cb, lag_mod_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
-EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
-
-static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
- const struct net_device *orig_dev,
- const struct switchdev_notifier_fdb_info *fdb_info,
- bool (*check_cb)(const struct net_device *dev),
- bool (*foreign_dev_check_cb)(const struct net_device *dev,
- const struct net_device *foreign_dev),
- int (*del_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_del_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
-{
- const struct switchdev_notifier_info *info = &fdb_info->info;
- struct net_device *br, *lower_dev;
- struct list_head *iter;
- int err = -EOPNOTSUPP;
-
- if (check_cb(dev))
- return del_cb(dev, orig_dev, info->ctx, fdb_info);
-
- if (netif_is_lag_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- goto maybe_bridged_with_us;
-
- /* This is a LAG interface that we offload */
- if (!lag_del_cb)
- return -EOPNOTSUPP;
-
- return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
- }
-
- /* Recurse through lower interfaces in case the FDB entry is pointing
- * towards a bridge device.
- */
- if (netif_is_bridge_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- return 0;
-
- /* This is a bridge interface that we offload */
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- /* Do not propagate FDB entries across bridges */
- if (netif_is_bridge_master(lower_dev))
- continue;
-
- /* Bridge ports might be either us, or LAG interfaces
- * that we offload.
- */
- if (!check_cb(lower_dev) &&
- !switchdev_lower_dev_find(lower_dev, check_cb,
- foreign_dev_check_cb))
- continue;
-
- err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
- fdb_info, check_cb,
- foreign_dev_check_cb,
- del_cb, lag_del_cb);
- if (err && err != -EOPNOTSUPP)
- return err;
- }
-
- return 0;
- }
-
-maybe_bridged_with_us:
- /* Event is neither on a bridge nor a LAG. Check whether it is on an
- * interface that is in a bridge with us.
- */
- br = netdev_master_upper_dev_get_rcu(dev);
- if (!br || !netif_is_bridge_master(br))
- return 0;
-
- if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
- return 0;
-
- return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
- check_cb, foreign_dev_check_cb,
- del_cb, lag_del_cb);
-}
-
-int switchdev_handle_fdb_del_to_device(struct net_device *dev,
- const struct switchdev_notifier_fdb_info *fdb_info,
- bool (*check_cb)(const struct net_device *dev),
- bool (*foreign_dev_check_cb)(const struct net_device *dev,
- const struct net_device *foreign_dev),
- int (*del_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_del_cb)(struct net_device *dev,
- const struct net_device *orig_dev, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
-{
- int err;
-
- err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
- check_cb,
- foreign_dev_check_cb,
- del_cb, lag_del_cb);
- if (err == -EOPNOTSUPP)
- err = 0;
-
- return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device);
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index f6cb0d4d114c..4b45ed631eb8 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -144,7 +144,7 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path,
addr = (unsigned long)ent->data;
if (is_module_address(addr))
where = "module";
- else if (core_kernel_data(addr))
+ else if (is_kernel_core_data(addr))
where = "kernel";
else
continue;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 443f8e5b9477..60bc74b76adc 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -462,7 +462,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
b->mtu = dev->mtu;
- b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr);
+ b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr);
rcu_assign_pointer(dev->tipc_ptr, b);
return 0;
}
@@ -703,7 +703,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_CHANGEADDR:
b->media->raw2addr(b, &b->addr,
- (char *)dev->dev_addr);
+ (const char *)dev->dev_addr);
tipc_reset_bearer(net, b);
break;
case NETDEV_UNREGISTER:
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 57c6a1a719e2..490ad6e5f7a3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -117,7 +117,7 @@ struct tipc_media {
char *msg);
int (*raw2addr)(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *raw);
+ const char *raw);
u32 priority;
u32 tolerance;
u32 min_win;
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c9391d38de85..dc60c32bb70d 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
u16 key_gen = msg_key_gen(hdr);
u16 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+ /* Verify whether the size can exist in the packet */
+ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+ pr_debug("%s: message data size is too small\n", rx->name);
+ goto exit;
+ }
+
+ keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+ /* Verify the supplied size values */
+ if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
+ keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+ pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+ goto exit;
+ }
spin_lock(&rx->lock);
if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
rx->skey, key_gen, rx->key_gen);
- goto exit;
+ goto exit_unlock;
}
/* Allocate memory for the key */
skey = kmalloc(size, GFP_ATOMIC);
if (unlikely(!skey)) {
pr_err("%s: unable to allocate memory for skey\n", rx->name);
- goto exit;
+ goto exit_unlock;
}
/* Copy key from msg data */
- skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+ skey->keylen = keylen;
memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
skey->keylen);
- /* Sanity check */
- if (unlikely(size != tipc_aead_key_size(skey))) {
- kfree(skey);
- skey = NULL;
- goto exit;
- }
-
rx->key_gen = key_gen;
rx->skey_mode = msg_key_mode(hdr);
rx->skey = skey;
rx->nokey = 0;
mb(); /* for nokey flag */
-exit:
+exit_unlock:
spin_unlock(&rx->lock);
+exit:
/* Schedule the key attaching on this crypto */
if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
return true;
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index c68019697cfe..cb0d185e06af 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -60,7 +60,7 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
/* Convert raw mac address format to media addr format */
static int tipc_eth_raw2addr(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *msg)
+ const char *msg)
{
memset(addr, 0, sizeof(*addr));
ether_addr_copy(addr->value, msg);
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 7aa9ff88458d..b9ad0434c3cd 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -67,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
/* Convert raw InfiniBand address format to media addr format */
static int tipc_ib_raw2addr(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *msg)
+ const char *msg)
{
memset(addr, 0, sizeof(*addr));
memcpy(addr->value, msg, INFINIBAND_ALEN);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a0a27d87f631..ad570c2450be 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fde56ff49163..acfba9f1ba72 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -421,6 +421,88 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
rc = -EFAULT;
break;
}
+ case TLS_CIPHER_AES_CCM_128: {
+ struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 =
+ container_of(crypto_info,
+ struct tls12_crypto_info_aes_ccm_128, info);
+
+ if (len != sizeof(*aes_ccm_128)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(aes_ccm_128->iv,
+ cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE,
+ TLS_CIPHER_AES_CCM_128_IV_SIZE);
+ memcpy(aes_ccm_128->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128)))
+ rc = -EFAULT;
+ break;
+ }
+ case TLS_CIPHER_CHACHA20_POLY1305: {
+ struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 =
+ container_of(crypto_info,
+ struct tls12_crypto_info_chacha20_poly1305,
+ info);
+
+ if (len != sizeof(*chacha20_poly1305)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(chacha20_poly1305->iv,
+ cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE,
+ TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE);
+ memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval, chacha20_poly1305,
+ sizeof(*chacha20_poly1305)))
+ rc = -EFAULT;
+ break;
+ }
+ case TLS_CIPHER_SM4_GCM: {
+ struct tls12_crypto_info_sm4_gcm *sm4_gcm_info =
+ container_of(crypto_info,
+ struct tls12_crypto_info_sm4_gcm, info);
+
+ if (len != sizeof(*sm4_gcm_info)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(sm4_gcm_info->iv,
+ cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE,
+ TLS_CIPHER_SM4_GCM_IV_SIZE);
+ memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info)))
+ rc = -EFAULT;
+ break;
+ }
+ case TLS_CIPHER_SM4_CCM: {
+ struct tls12_crypto_info_sm4_ccm *sm4_ccm_info =
+ container_of(crypto_info,
+ struct tls12_crypto_info_sm4_ccm, info);
+
+ if (len != sizeof(*sm4_ccm_info)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(sm4_ccm_info->iv,
+ cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE,
+ TLS_CIPHER_SM4_CCM_IV_SIZE);
+ memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info)))
+ rc = -EFAULT;
+ break;
+ }
default:
rc = -EINVAL;
}
@@ -524,6 +606,12 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
case TLS_CIPHER_CHACHA20_POLY1305:
optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305);
break;
+ case TLS_CIPHER_SM4_GCM:
+ optsize = sizeof(struct tls12_crypto_info_sm4_gcm);
+ break;
+ case TLS_CIPHER_SM4_CCM:
+ optsize = sizeof(struct tls12_crypto_info_sm4_ccm);
+ break;
default:
rc = -EINVAL;
goto err_crypto_info;
@@ -681,12 +769,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
- prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
+ prot[TLS_BASE][TLS_SW].sock_is_readable = tls_sw_sock_is_readable;
prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close;
prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg;
- prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read;
+ prot[TLS_SW][TLS_SW].sock_is_readable = tls_sw_sock_is_readable;
prot[TLS_SW][TLS_SW].close = tls_sk_proto_close;
#ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 4feb95e34b64..d81564078557 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -35,6 +35,7 @@
* SOFTWARE.
*/
+#include <linux/bug.h>
#include <linux/sched/signal.h>
#include <linux/module.h>
#include <linux/splice.h>
@@ -43,6 +44,14 @@
#include <net/strparser.h>
#include <net/tls.h>
+noinline void tls_err_abort(struct sock *sk, int err)
+{
+ WARN_ON_ONCE(err >= 0);
+ /* sk->sk_err should contain a positive error code. */
+ sk->sk_err = -err;
+ sk_error_report(sk);
+}
+
static int __skb_nsg(struct sk_buff *skb, int offset, int len,
unsigned int recursion_level)
{
@@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags)
tx_err:
if (rc < 0 && rc != -EAGAIN)
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
return rc;
}
@@ -450,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If err is already set on socket, return the same code */
if (sk->sk_err) {
- ctx->async_wait.err = sk->sk_err;
+ ctx->async_wait.err = -sk->sk_err;
} else {
ctx->async_wait.err = err;
tls_err_abort(sk, err);
@@ -498,9 +507,15 @@ static int tls_do_encryption(struct sock *sk,
int rc, iv_offset = 0;
/* For CCM based ciphers, first byte of IV is a constant */
- if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) {
+ switch (prot->cipher_type) {
+ case TLS_CIPHER_AES_CCM_128:
rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE;
iv_offset = 1;
+ break;
+ case TLS_CIPHER_SM4_CCM:
+ rec->iv_data[0] = TLS_SM4_CCM_IV_B0_BYTE;
+ iv_offset = 1;
+ break;
}
memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv,
@@ -763,7 +778,7 @@ static int tls_push_record(struct sock *sk, int flags,
msg_pl->sg.size + prot->tail_size, i);
if (rc < 0) {
if (rc != -EINPROGRESS) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
if (split) {
tls_ctx->pending_open_record_frags = true;
tls_merge_open_record(sk, rec, tmp, orig_end);
@@ -1457,10 +1472,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
aad = (u8 *)(sgout + n_sgout);
iv = aad + prot->aad_size;
- /* For CCM based ciphers, first byte of nonce+iv is always '2' */
- if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) {
- iv[0] = 2;
+ /* For CCM based ciphers, first byte of nonce+iv is a constant */
+ switch (prot->cipher_type) {
+ case TLS_CIPHER_AES_CCM_128:
+ iv[0] = TLS_AES_CCM_IV_B0_BYTE;
iv_offset = 1;
+ break;
+ case TLS_CIPHER_SM4_CCM:
+ iv[0] = TLS_SM4_CCM_IV_B0_BYTE;
+ iv_offset = 1;
+ break;
}
/* Prepare IV */
@@ -1827,7 +1848,7 @@ int tls_sw_recvmsg(struct sock *sk,
err = decrypt_skb_update(sk, skb, &msg->msg_iter,
&chunk, &zc, async_capable);
if (err < 0 && err != -EINPROGRESS) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
@@ -2007,7 +2028,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
}
if (err < 0) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
ctx->decrypted = 1;
@@ -2026,7 +2047,7 @@ splice_read_end:
return copied ? : err;
}
-bool tls_sw_stream_read(const struct sock *sk)
+bool tls_sw_sock_is_readable(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -2424,6 +2445,40 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
cipher_name = "rfc7539(chacha20,poly1305)";
break;
}
+ case TLS_CIPHER_SM4_GCM: {
+ struct tls12_crypto_info_sm4_gcm *sm4_gcm_info;
+
+ sm4_gcm_info = (void *)crypto_info;
+ nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
+ tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE;
+ iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
+ iv = sm4_gcm_info->iv;
+ rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE;
+ rec_seq = sm4_gcm_info->rec_seq;
+ keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE;
+ key = sm4_gcm_info->key;
+ salt = sm4_gcm_info->salt;
+ salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE;
+ cipher_name = "gcm(sm4)";
+ break;
+ }
+ case TLS_CIPHER_SM4_CCM: {
+ struct tls12_crypto_info_sm4_ccm *sm4_ccm_info;
+
+ sm4_ccm_info = (void *)crypto_info;
+ nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
+ tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE;
+ iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
+ iv = sm4_ccm_info->iv;
+ rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE;
+ rec_seq = sm4_ccm_info->rec_seq;
+ keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE;
+ key = sm4_ccm_info->key;
+ salt = sm4_ccm_info->salt;
+ salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE;
+ cipher_name = "ccm(sm4)";
+ break;
+ }
default:
rc = -EINVAL;
goto free_priv;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb47b9de2380..78e08e82c08c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -806,7 +828,7 @@ static void unix_unhash(struct sock *sk)
}
struct proto unix_dgram_proto = {
- .name = "UNIX-DGRAM",
+ .name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
@@ -828,20 +850,25 @@ struct proto unix_stream_proto = {
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
- struct sock *sk = NULL;
struct unix_sock *u;
+ struct sock *sk;
+ int err;
atomic_long_inc(&unix_nr_socks);
- if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
- goto out;
+ if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
+ err = -ENFILE;
+ goto err;
+ }
if (type == SOCK_STREAM)
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
else /*dgram and seqpacket */
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
- if (!sk)
- goto out;
+ if (!sk) {
+ err = -ENOMEM;
+ goto err;
+ }
sock_init_data(sock, sk);
@@ -861,20 +888,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
-out:
- if (sk == NULL)
- atomic_long_dec(&unix_nr_socks);
- else {
- local_bh_disable();
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- local_bh_enable();
- }
+
+ local_bh_disable();
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ local_bh_enable();
+
return sk;
+
+err:
+ atomic_long_dec(&unix_nr_socks);
+ return ERR_PTR(err);
}
static int unix_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ struct sock *sk;
+
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
@@ -901,7 +931,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
+ sk = unix_create1(net, sock, kern, sock->type);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ return 0;
}
static int unix_release(struct socket *sock)
@@ -1314,12 +1348,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
we will have to recheck all again in any case.
*/
- err = -ENOMEM;
-
/* create new sock for complete connection */
newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
- if (newsk == NULL)
+ if (IS_ERR(newsk)) {
+ err = PTR_ERR(newsk);
+ newsk = NULL;
goto out;
+ }
+
+ err = -ENOMEM;
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
@@ -2845,6 +2882,9 @@ static int unix_shutdown(struct socket *sock, int mode)
unix_state_lock(sk);
sk->sk_shutdown |= mode;
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+ mode == SHUTDOWN_MASK)
+ sk->sk_state = TCP_CLOSE;
other = unix_peer(sk);
if (other)
sock_hold(other);
@@ -2867,12 +2907,10 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK) {
+ if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- other->sk_state = TCP_CLOSE;
- } else if (peer_mode & RCV_SHUTDOWN) {
+ else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
- }
}
if (other)
sock_put(other);
@@ -3014,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
@@ -3053,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
@@ -3073,7 +3115,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
- unix_recvq_full(other) &&
+ unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index b927e2baae50..452376c6f419 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void unix_stream_bpf_rebuild_protos(struct proto *prot,
@@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot,
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
prot->unhash = sock_map_unhash;
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e2c0cfb334d2..ed0df839c38c 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1322,6 +1322,8 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
* non-blocking call.
*/
err = -EALREADY;
+ if (flags & O_NONBLOCK)
+ goto out;
break;
default:
if ((sk->sk_state == TCP_LISTEN) ||
@@ -1614,13 +1616,18 @@ static int vsock_connectible_setsockopt(struct socket *sock,
vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
break;
- case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
- struct __kernel_old_timeval tv;
- COPY_IN(tv);
+ case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
+ case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: {
+ struct __kernel_sock_timeval tv;
+
+ err = sock_copy_user_timeval(&tv, optval, optlen,
+ optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
+ if (err)
+ break;
if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
vsk->connect_timeout = tv.tv_sec * HZ +
- DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
+ DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ));
if (vsk->connect_timeout == 0)
vsk->connect_timeout =
VSOCK_DEFAULT_CONNECT_TIMEOUT;
@@ -1648,68 +1655,59 @@ static int vsock_connectible_getsockopt(struct socket *sock,
char __user *optval,
int __user *optlen)
{
- int err;
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ union {
+ u64 val64;
+ struct old_timeval32 tm32;
+ struct __kernel_old_timeval tm;
+ struct __kernel_sock_timeval stm;
+ } v;
+
+ int lv = sizeof(v.val64);
int len;
- struct sock *sk;
- struct vsock_sock *vsk;
- u64 val;
if (level != AF_VSOCK)
return -ENOPROTOOPT;
- err = get_user(len, optlen);
- if (err != 0)
- return err;
-
-#define COPY_OUT(_v) \
- do { \
- if (len < sizeof(_v)) \
- return -EINVAL; \
- \
- len = sizeof(_v); \
- if (copy_to_user(optval, &_v, len) != 0) \
- return -EFAULT; \
- \
- } while (0)
+ if (get_user(len, optlen))
+ return -EFAULT;
- err = 0;
- sk = sock->sk;
- vsk = vsock_sk(sk);
+ memset(&v, 0, sizeof(v));
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
- val = vsk->buffer_size;
- COPY_OUT(val);
+ v.val64 = vsk->buffer_size;
break;
case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
- val = vsk->buffer_max_size;
- COPY_OUT(val);
+ v.val64 = vsk->buffer_max_size;
break;
case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
- val = vsk->buffer_min_size;
- COPY_OUT(val);
+ v.val64 = vsk->buffer_min_size;
break;
- case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
- struct __kernel_old_timeval tv;
- tv.tv_sec = vsk->connect_timeout / HZ;
- tv.tv_usec =
- (vsk->connect_timeout -
- tv.tv_sec * HZ) * (1000000 / HZ);
- COPY_OUT(tv);
+ case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
+ case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD:
+ lv = sock_get_timeout(vsk->connect_timeout, &v,
+ optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
break;
- }
+
default:
return -ENOPROTOOPT;
}
- err = put_user(len, optlen);
- if (err != 0)
+ if (len < lv)
+ return -EINVAL;
+ if (len > lv)
+ len = lv;
+ if (copy_to_user(optval, &v, len))
return -EFAULT;
-#undef COPY_OUT
+ if (put_user(len, optlen))
+ return -EFAULT;
return 0;
}
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index af590ae606b6..756e7de7e33f 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -26,7 +26,7 @@ endif
$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
- @(echo '#include "reg.h"'; \
+ $(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
echo | cat - $^ ; \
echo '};'; \
@@ -36,7 +36,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@$(kecho) " GEN $@"
- @(set -e; \
+ $(Q)(set -e; \
allf=""; \
for f in $^ ; do \
test -f $$f || continue;\
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 03323121ca50..eb297e1015e0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -524,6 +524,7 @@ use_default_name:
INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
INIT_WORK(&rdev->mgmt_registrations_update_wk,
cfg80211_mgmt_registrations_update_wk);
+ spin_lock_init(&rdev->mgmt_registrations_lock);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -1080,6 +1081,16 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
cfg80211_put_bss(&rdev->wiphy, &scan->pub);
mutex_destroy(&rdev->wiphy.mtx);
+
+ /*
+ * The 'regd' can only be non-NULL if we never finished
+ * initializing the wiphy and thus never went through the
+ * unregister path - e.g. in failure scenarios. Thus, it
+ * cannot have been visible to anyone if non-NULL, so we
+ * can just free it here.
+ */
+ kfree(rcu_dereference_raw(rdev->wiphy.regd));
+
kfree(rdev);
}
@@ -1279,7 +1290,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
INIT_LIST_HEAD(&wdev->event_list);
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
- spin_lock_init(&wdev->mgmt_registrations_lock);
INIT_LIST_HEAD(&wdev->pmsr_list);
spin_lock_init(&wdev->pmsr_lock);
INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b35d0db12f1d..1720abf36f92 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -100,6 +100,8 @@ struct cfg80211_registered_device {
struct work_struct propagate_cac_done_wk;
struct work_struct mgmt_registrations_update_wk;
+ /* lock for all wdev lists */
+ spinlock_t mgmt_registrations_lock;
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 3aa69b375a10..783acd2c4211 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
lockdep_assert_held(&rdev->wiphy.mtx);
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
if (!wdev->mgmt_registrations_need_update) {
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
return;
}
@@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
rcu_read_unlock();
wdev->mgmt_registrations_need_update = 0;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
}
@@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
int match_len, bool multicast_rx,
struct netlink_ext_ack *extack)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_mgmt_registration *reg, *nreg;
int err = 0;
u16 mgmt_type;
@@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
if (!nreg)
return -ENOMEM;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
int mlen = min(match_len, reg->match_len);
@@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
list_add(&nreg->list, &wdev->mgmt_registrations);
}
wdev->mgmt_registrations_need_update = 1;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
@@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
out:
kfree(nreg);
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
return err;
}
@@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg, *tmp;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
if (reg->nlportid != nlportid)
@@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
schedule_work(&rdev->mgmt_registrations_update_wk);
}
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
if (nlportid && rdev->crit_proto_nlportid == nlportid) {
rdev->crit_proto_nlportid = 0;
@@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_mgmt_registration *reg, *tmp;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
list_del(&reg->list);
kfree(reg);
}
wdev->mgmt_registrations_need_update = 1;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
}
@@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
data = buf + ieee80211_hdrlen(mgmt->frame_control);
data_len = len - ieee80211_hdrlen(mgmt->frame_control);
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
if (reg->frame_type != ftype)
@@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
break;
}
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
trace_cfg80211_return_bool(result);
return result;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bf7cd4752547..81232b73df8f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -437,6 +437,16 @@ sar_policy[NL80211_SAR_ATTR_MAX + 1] = {
[NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy),
};
+static const struct nla_policy
+nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = {
+ [NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES] = NLA_POLICY_MIN(NLA_U8, 2),
+ [NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY] =
+ NLA_POLICY_MIN(NLA_U8, 1),
+ [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 },
+ [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 },
+ [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG },
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -763,6 +773,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 },
[NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 },
[NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy),
+ [NL80211_ATTR_MBSSID_CONFIG] =
+ NLA_POLICY_NESTED(nl80211_mbssid_config_policy),
+ [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED },
};
/* policy for the key attributes */
@@ -853,6 +866,7 @@ nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = {
[NL80211_BAND_5GHZ] = { .type = NLA_S32 },
[NL80211_BAND_6GHZ] = { .type = NLA_S32 },
[NL80211_BAND_60GHZ] = { .type = NLA_S32 },
+ [NL80211_BAND_LC] = { .type = NLA_S32 },
};
static const struct nla_policy
@@ -2207,6 +2221,35 @@ fail:
return -ENOBUFS;
}
+static int nl80211_put_mbssid_support(struct wiphy *wiphy, struct sk_buff *msg)
+{
+ struct nlattr *config;
+
+ if (!wiphy->mbssid_max_interfaces)
+ return 0;
+
+ config = nla_nest_start(msg, NL80211_ATTR_MBSSID_CONFIG);
+ if (!config)
+ return -ENOBUFS;
+
+ if (nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES,
+ wiphy->mbssid_max_interfaces))
+ goto fail;
+
+ if (wiphy->ema_max_profile_periodicity &&
+ nla_put_u8(msg,
+ NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY,
+ wiphy->ema_max_profile_periodicity))
+ goto fail;
+
+ nla_nest_end(msg, config);
+ return 0;
+
+fail:
+ nla_nest_cancel(msg, config);
+ return -ENOBUFS;
+}
+
struct nl80211_dump_wiphy_state {
s64 filter_wiphy;
long start;
@@ -2792,6 +2835,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (nl80211_put_sar_specs(rdev, msg))
goto nla_put_failure;
+ if (nl80211_put_mbssid_support(&rdev->wiphy, msg))
+ goto nla_put_failure;
+
/* done */
state->split_start = 0;
break;
@@ -4981,6 +5027,96 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
return 0;
}
+static int nl80211_parse_mbssid_config(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct nlattr *attrs,
+ struct cfg80211_mbssid_config *config,
+ u8 num_elems)
+{
+ struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1];
+
+ if (!wiphy->mbssid_max_interfaces)
+ return -EOPNOTSUPP;
+
+ if (nla_parse_nested(tb, NL80211_MBSSID_CONFIG_ATTR_MAX, attrs, NULL,
+ NULL) ||
+ !tb[NL80211_MBSSID_CONFIG_ATTR_INDEX])
+ return -EINVAL;
+
+ config->ema = nla_get_flag(tb[NL80211_MBSSID_CONFIG_ATTR_EMA]);
+ if (config->ema) {
+ if (!wiphy->ema_max_profile_periodicity)
+ return -EOPNOTSUPP;
+
+ if (num_elems > wiphy->ema_max_profile_periodicity)
+ return -EINVAL;
+ }
+
+ config->index = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]);
+ if (config->index >= wiphy->mbssid_max_interfaces ||
+ (!config->index && !num_elems))
+ return -EINVAL;
+
+ if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) {
+ u32 tx_ifindex =
+ nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]);
+
+ if ((!config->index && tx_ifindex != dev->ifindex) ||
+ (config->index && tx_ifindex == dev->ifindex))
+ return -EINVAL;
+
+ if (tx_ifindex != dev->ifindex) {
+ struct net_device *tx_netdev =
+ dev_get_by_index(wiphy_net(wiphy), tx_ifindex);
+
+ if (!tx_netdev || !tx_netdev->ieee80211_ptr ||
+ tx_netdev->ieee80211_ptr->wiphy != wiphy ||
+ tx_netdev->ieee80211_ptr->iftype !=
+ NL80211_IFTYPE_AP) {
+ dev_put(tx_netdev);
+ return -EINVAL;
+ }
+
+ config->tx_wdev = tx_netdev->ieee80211_ptr;
+ } else {
+ config->tx_wdev = dev->ieee80211_ptr;
+ }
+ } else if (!config->index) {
+ config->tx_wdev = dev->ieee80211_ptr;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct cfg80211_mbssid_elems *
+nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
+{
+ struct nlattr *nl_elems;
+ struct cfg80211_mbssid_elems *elems;
+ int rem_elems;
+ u8 i = 0, num_elems = 0;
+
+ if (!wiphy->mbssid_max_interfaces)
+ return ERR_PTR(-EINVAL);
+
+ nla_for_each_nested(nl_elems, attrs, rem_elems)
+ num_elems++;
+
+ elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
+ if (!elems)
+ return ERR_PTR(-ENOMEM);
+
+ nla_for_each_nested(nl_elems, attrs, rem_elems) {
+ elems->elem[i].data = nla_data(nl_elems);
+ elems->elem[i].len = nla_len(nl_elems);
+ i++;
+ }
+ elems->cnt = num_elems;
+ return elems;
+}
+
static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
struct nlattr *attrs[],
struct cfg80211_beacon_data *bcn)
@@ -5061,6 +5197,17 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
bcn->ftm_responder = -1;
}
+ if (attrs[NL80211_ATTR_MBSSID_ELEMS]) {
+ struct cfg80211_mbssid_elems *mbssid =
+ nl80211_parse_mbssid_elems(&rdev->wiphy,
+ attrs[NL80211_ATTR_MBSSID_ELEMS]);
+
+ if (IS_ERR(mbssid))
+ return PTR_ERR(mbssid);
+
+ bcn->mbssid_ies = mbssid;
+ }
+
return 0;
}
@@ -5192,21 +5339,21 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev,
}
static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
- const u8 *rates)
+ const struct element *rates)
{
int i;
if (!rates)
return;
- for (i = 0; i < rates[1]; i++) {
- if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
+ for (i = 0; i < rates->datalen; i++) {
+ if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
params->ht_required = true;
- if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY)
+ if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY)
params->vht_required = true;
- if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY)
+ if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY)
params->he_required = true;
- if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E)
+ if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E)
params->sae_h2e_required = true;
}
}
@@ -5221,27 +5368,27 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
const struct cfg80211_beacon_data *bcn = &params->beacon;
size_t ies_len = bcn->tail_len;
const u8 *ies = bcn->tail;
- const u8 *rates;
- const u8 *cap;
+ const struct element *rates;
+ const struct element *cap;
- rates = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies, ies_len);
+ rates = cfg80211_find_elem(WLAN_EID_SUPP_RATES, ies, ies_len);
nl80211_check_ap_rate_selectors(params, rates);
- rates = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, ies, ies_len);
+ rates = cfg80211_find_elem(WLAN_EID_EXT_SUPP_RATES, ies, ies_len);
nl80211_check_ap_rate_selectors(params, rates);
- cap = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies, ies_len);
- if (cap && cap[1] >= sizeof(*params->ht_cap))
- params->ht_cap = (void *)(cap + 2);
- cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len);
- if (cap && cap[1] >= sizeof(*params->vht_cap))
- params->vht_cap = (void *)(cap + 2);
- cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len);
- if (cap && cap[1] >= sizeof(*params->he_cap) + 1)
- params->he_cap = (void *)(cap + 3);
- cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ies, ies_len);
- if (cap && cap[1] >= sizeof(*params->he_oper) + 1)
- params->he_oper = (void *)(cap + 3);
+ cap = cfg80211_find_elem(WLAN_EID_HT_CAPABILITY, ies, ies_len);
+ if (cap && cap->datalen >= sizeof(*params->ht_cap))
+ params->ht_cap = (void *)cap->data;
+ cap = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ies, ies_len);
+ if (cap && cap->datalen >= sizeof(*params->vht_cap))
+ params->vht_cap = (void *)cap->data;
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len);
+ if (cap && cap->datalen >= sizeof(*params->he_cap) + 1)
+ params->he_cap = (void *)(cap->data + 1);
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len);
+ if (cap && cap->datalen >= sizeof(*params->he_oper) + 1)
+ params->he_oper = (void *)(cap->data + 1);
}
static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
@@ -5323,7 +5470,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_ap_settings params;
+ struct cfg80211_ap_settings *params;
int err;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
@@ -5336,27 +5483,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (wdev->beacon_interval)
return -EALREADY;
- memset(&params, 0, sizeof(params));
-
/* these are required for START_AP */
if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
!info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
!info->attrs[NL80211_ATTR_BEACON_HEAD])
return -EINVAL;
- err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon);
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ err = nl80211_parse_beacon(rdev, info->attrs, &params->beacon);
if (err)
- return err;
+ goto out;
- params.beacon_interval =
+ params->beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- params.dtim_period =
+ params->dtim_period =
nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype,
- params.beacon_interval);
+ params->beacon_interval);
if (err)
- return err;
+ goto out;
/*
* In theory, some of these attributes should be required here
@@ -5366,129 +5515,157 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
* additional information -- drivers must check!
*/
if (info->attrs[NL80211_ATTR_SSID]) {
- params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
- params.ssid_len =
+ params->ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+ params->ssid_len =
nla_len(info->attrs[NL80211_ATTR_SSID]);
- if (params.ssid_len == 0)
- return -EINVAL;
+ if (params->ssid_len == 0) {
+ err = -EINVAL;
+ goto out;
+ }
}
if (info->attrs[NL80211_ATTR_HIDDEN_SSID])
- params.hidden_ssid = nla_get_u32(
+ params->hidden_ssid = nla_get_u32(
info->attrs[NL80211_ATTR_HIDDEN_SSID]);
- params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
+ params->privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
if (info->attrs[NL80211_ATTR_AUTH_TYPE]) {
- params.auth_type = nla_get_u32(
+ params->auth_type = nla_get_u32(
info->attrs[NL80211_ATTR_AUTH_TYPE]);
- if (!nl80211_valid_auth_type(rdev, params.auth_type,
- NL80211_CMD_START_AP))
- return -EINVAL;
+ if (!nl80211_valid_auth_type(rdev, params->auth_type,
+ NL80211_CMD_START_AP)) {
+ err = -EINVAL;
+ goto out;
+ }
} else
- params.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+ params->auth_type = NL80211_AUTHTYPE_AUTOMATIC;
- err = nl80211_crypto_settings(rdev, info, &params.crypto,
+ err = nl80211_crypto_settings(rdev, info, &params->crypto,
NL80211_MAX_NR_CIPHER_SUITES);
if (err)
- return err;
+ goto out;
if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) {
- if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER))
- return -EOPNOTSUPP;
- params.inactivity_timeout = nla_get_u16(
+ if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ params->inactivity_timeout = nla_get_u16(
info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]);
}
if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) {
- if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
- return -EINVAL;
- params.p2p_ctwindow =
+ if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+ err = -EINVAL;
+ goto out;
+ }
+ params->p2p_ctwindow =
nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
- if (params.p2p_ctwindow != 0 &&
- !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN))
- return -EINVAL;
+ if (params->p2p_ctwindow != 0 &&
+ !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) {
+ err = -EINVAL;
+ goto out;
+ }
}
if (info->attrs[NL80211_ATTR_P2P_OPPPS]) {
u8 tmp;
- if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
- return -EINVAL;
+ if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+ err = -EINVAL;
+ goto out;
+ }
tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]);
- params.p2p_opp_ps = tmp;
- if (params.p2p_opp_ps != 0 &&
- !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS))
- return -EINVAL;
+ params->p2p_opp_ps = tmp;
+ if (params->p2p_opp_ps != 0 &&
+ !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) {
+ err = -EINVAL;
+ goto out;
+ }
}
if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
- err = nl80211_parse_chandef(rdev, info, &params.chandef);
+ err = nl80211_parse_chandef(rdev, info, &params->chandef);
if (err)
- return err;
+ goto out;
} else if (wdev->preset_chandef.chan) {
- params.chandef = wdev->preset_chandef;
- } else if (!nl80211_get_ap_channel(rdev, &params))
- return -EINVAL;
+ params->chandef = wdev->preset_chandef;
+ } else if (!nl80211_get_ap_channel(rdev, params)) {
+ err = -EINVAL;
+ goto out;
+ }
- if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
- wdev->iftype))
- return -EINVAL;
+ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params->chandef,
+ wdev->iftype)) {
+ err = -EINVAL;
+ goto out;
+ }
if (info->attrs[NL80211_ATTR_TX_RATES]) {
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
- &params.beacon_rate,
+ &params->beacon_rate,
dev, false);
if (err)
- return err;
+ goto out;
- err = validate_beacon_tx_rate(rdev, params.chandef.chan->band,
- &params.beacon_rate);
+ err = validate_beacon_tx_rate(rdev, params->chandef.chan->band,
+ &params->beacon_rate);
if (err)
- return err;
+ goto out;
}
if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
- params.smps_mode =
+ params->smps_mode =
nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
- switch (params.smps_mode) {
+ switch (params->smps_mode) {
case NL80211_SMPS_OFF:
break;
case NL80211_SMPS_STATIC:
if (!(rdev->wiphy.features &
- NL80211_FEATURE_STATIC_SMPS))
- return -EINVAL;
+ NL80211_FEATURE_STATIC_SMPS)) {
+ err = -EINVAL;
+ goto out;
+ }
break;
case NL80211_SMPS_DYNAMIC:
if (!(rdev->wiphy.features &
- NL80211_FEATURE_DYNAMIC_SMPS))
- return -EINVAL;
+ NL80211_FEATURE_DYNAMIC_SMPS)) {
+ err = -EINVAL;
+ goto out;
+ }
break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
} else {
- params.smps_mode = NL80211_SMPS_OFF;
+ params->smps_mode = NL80211_SMPS_OFF;
}
- params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
- if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
- return -EOPNOTSUPP;
+ params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
+ if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
- params.acl = parse_acl_data(&rdev->wiphy, info);
- if (IS_ERR(params.acl))
- return PTR_ERR(params.acl);
+ params->acl = parse_acl_data(&rdev->wiphy, info);
+ if (IS_ERR(params->acl)) {
+ err = PTR_ERR(params->acl);
+ params->acl = NULL;
+ goto out;
+ }
}
- params.twt_responder =
+ params->twt_responder =
nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]);
if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) {
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
- &params.he_obss_pd);
+ &params->he_obss_pd);
if (err)
goto out;
}
@@ -5496,7 +5673,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) {
err = nl80211_parse_he_bss_color(
info->attrs[NL80211_ATTR_HE_BSS_COLOR],
- &params.he_bss_color);
+ &params->he_bss_color);
if (err)
goto out;
}
@@ -5504,7 +5681,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) {
err = nl80211_parse_fils_discovery(rdev,
info->attrs[NL80211_ATTR_FILS_DISCOVERY],
- &params);
+ params);
if (err)
goto out;
}
@@ -5512,24 +5689,35 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
err = nl80211_parse_unsol_bcast_probe_resp(
rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
- &params);
+ params);
+ if (err)
+ goto out;
+ }
+
+ if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) {
+ err = nl80211_parse_mbssid_config(&rdev->wiphy, dev,
+ info->attrs[NL80211_ATTR_MBSSID_CONFIG],
+ &params->mbssid_config,
+ params->beacon.mbssid_ies ?
+ params->beacon.mbssid_ies->cnt :
+ 0);
if (err)
goto out;
}
- nl80211_calculate_ap_params(&params);
+ nl80211_calculate_ap_params(params);
if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
- params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT;
+ params->flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT;
wdev_lock(wdev);
- err = rdev_start_ap(rdev, dev, &params);
+ err = rdev_start_ap(rdev, dev, params);
if (!err) {
- wdev->preset_chandef = params.chandef;
- wdev->beacon_interval = params.beacon_interval;
- wdev->chandef = params.chandef;
- wdev->ssid_len = params.ssid_len;
- memcpy(wdev->ssid, params.ssid, wdev->ssid_len);
+ wdev->preset_chandef = params->chandef;
+ wdev->beacon_interval = params->beacon_interval;
+ wdev->chandef = params->chandef;
+ wdev->ssid_len = params->ssid_len;
+ memcpy(wdev->ssid, params->ssid, wdev->ssid_len);
if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
wdev->conn_owner_nlportid = info->snd_portid;
@@ -5537,7 +5725,13 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
wdev_unlock(wdev);
out:
- kfree(params.acl);
+ kfree(params->acl);
+ kfree(params->beacon.mbssid_ies);
+ if (params->mbssid_config.tx_wdev &&
+ params->mbssid_config.tx_wdev->netdev &&
+ params->mbssid_config.tx_wdev->netdev != dev)
+ dev_put(params->mbssid_config.tx_wdev->netdev);
+ kfree(params);
return err;
}
@@ -5562,12 +5756,14 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_beacon(rdev, info->attrs, &params);
if (err)
- return err;
+ goto out;
wdev_lock(wdev);
err = rdev_change_beacon(rdev, dev, &params);
wdev_unlock(wdev);
+out:
+ kfree(params.mbssid_ies);
return err;
}
@@ -9244,12 +9440,14 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_after);
if (err)
- return err;
+ goto free;
csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs),
GFP_KERNEL);
- if (!csa_attrs)
- return -ENOMEM;
+ if (!csa_attrs) {
+ err = -ENOMEM;
+ goto free;
+ }
err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX,
info->attrs[NL80211_ATTR_CSA_IES],
@@ -9367,6 +9565,8 @@ skip_beacons:
wdev_unlock(wdev);
free:
+ kfree(params.beacon_after.mbssid_ies);
+ kfree(params.beacon_csa.mbssid_ies);
kfree(csa_attrs);
return err;
}
@@ -11767,8 +11967,9 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
if (n_thresholds) {
struct cfg80211_cqm_config *cqm_config;
- cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) +
- n_thresholds * sizeof(s32), GFP_KERNEL);
+ cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
+ n_thresholds),
+ GFP_KERNEL);
if (!cqm_config) {
err = -ENOMEM;
goto unlock;
@@ -11777,7 +11978,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
cqm_config->rssi_hyst = hysteresis;
cqm_config->n_rssi_thresholds = n_thresholds;
memcpy(cqm_config->rssi_thresholds, thresholds,
- n_thresholds * sizeof(s32));
+ flex_array_size(cqm_config, rssi_thresholds,
+ n_thresholds));
wdev->cqm_config = cqm_config;
}
@@ -14900,10 +15102,35 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
wdev_unlock(wdev);
out:
+ kfree(params.beacon_next.mbssid_ies);
+ kfree(params.beacon_color_change.mbssid_ies);
kfree(tb);
return err;
}
+static int nl80211_set_fils_aad(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_fils_aad fils_aad = {};
+ u8 *nonces;
+
+ if (!info->attrs[NL80211_ATTR_MAC] ||
+ !info->attrs[NL80211_ATTR_FILS_KEK] ||
+ !info->attrs[NL80211_ATTR_FILS_NONCES])
+ return -EINVAL;
+
+ fils_aad.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ fils_aad.kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]);
+ fils_aad.kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]);
+ nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
+ fils_aad.snonce = nonces;
+ fils_aad.anonce = nonces + FILS_NONCE_LEN;
+
+ return rdev_set_fils_aad(rdev, dev, &fils_aad);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -15081,9 +15308,7 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info)
if (specs > rdev->wiphy.sar_capa->num_freq_ranges)
return -EINVAL;
- sar_spec = kzalloc(sizeof(*sar_spec) +
- specs * sizeof(struct cfg80211_sar_sub_specs),
- GFP_KERNEL);
+ sar_spec = kzalloc(struct_size(sar_spec, sub_specs, specs), GFP_KERNEL);
if (!sar_spec)
return -ENOMEM;
@@ -15907,6 +16132,13 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_SET_FILS_AAD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = nl80211_set_fils_aad,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ce6bf218a1a3..cc1efec4b27b 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1381,4 +1381,18 @@ static inline int rdev_color_change(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_set_fils_aad(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, struct cfg80211_fils_aad *fils_aad)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_set_fils_aad(&rdev->wiphy, dev, fils_aad);
+ if (rdev->ops->set_fils_aad)
+ ret = rdev->ops->set_fils_aad(&rdev->wiphy, dev, fils_aad);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 11c68b159324..22e92be61938 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -383,7 +383,7 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
const u8 *ssid, size_t ssid_len)
{
const struct cfg80211_bss_ies *ies;
- const u8 *ssidie;
+ const struct element *ssid_elem;
if (bssid && !ether_addr_equal(a->bssid, bssid))
return false;
@@ -394,12 +394,12 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
ies = rcu_access_pointer(a->ies);
if (!ies)
return false;
- ssidie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
- if (!ssidie)
+ ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, ies->data, ies->len);
+ if (!ssid_elem)
return false;
- if (ssidie[1] != ssid_len)
+ if (ssid_elem->datalen != ssid_len)
return false;
- return memcmp(ssidie + 2, ssid, ssid_len) == 0;
+ return memcmp(ssid_elem->data, ssid, ssid_len) == 0;
}
static int
@@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
}
ssid_len = ssid[1];
ssid = ssid + 2;
- rcu_read_unlock();
/* check if nontrans_bss is in the list */
list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) {
- if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len))
+ if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) {
+ rcu_read_unlock();
return 0;
+ }
}
+ rcu_read_unlock();
+
/* add to the list */
list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
return 0;
@@ -1791,25 +1794,13 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
return NULL;
}
-/*
- * Update RX channel information based on the available frame payload
- * information. This is mainly for the 2.4 GHz band where frames can be received
- * from neighboring channels and the Beacon frames use the DSSS Parameter Set
- * element to indicate the current (transmitting) channel, but this might also
- * be needed on other bands if RX frequency does not match with the actual
- * operating channel of a BSS.
- */
-static struct ieee80211_channel *
-cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
- struct ieee80211_channel *channel,
- enum nl80211_bss_scan_width scan_width)
+int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
+ enum nl80211_band band)
{
const u8 *tmp;
- u32 freq;
int channel_number = -1;
- struct ieee80211_channel *alt_channel;
- if (channel->band == NL80211_BAND_S1GHZ) {
+ if (band == NL80211_BAND_S1GHZ) {
tmp = cfg80211_find_ie(WLAN_EID_S1G_OPERATION, ie, ielen);
if (tmp && tmp[1] >= sizeof(struct ieee80211_s1g_oper_ie)) {
struct ieee80211_s1g_oper_ie *s1gop = (void *)(tmp + 2);
@@ -1830,6 +1821,29 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
}
}
+ return channel_number;
+}
+EXPORT_SYMBOL(cfg80211_get_ies_channel_number);
+
+/*
+ * Update RX channel information based on the available frame payload
+ * information. This is mainly for the 2.4 GHz band where frames can be received
+ * from neighboring channels and the Beacon frames use the DSSS Parameter Set
+ * element to indicate the current (transmitting) channel, but this might also
+ * be needed on other bands if RX frequency does not match with the actual
+ * operating channel of a BSS.
+ */
+static struct ieee80211_channel *
+cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
+ struct ieee80211_channel *channel,
+ enum nl80211_bss_scan_width scan_width)
+{
+ u32 freq;
+ int channel_number;
+ struct ieee80211_channel *alt_channel;
+
+ channel_number = cfg80211_get_ies_channel_number(ie, ielen, channel->band);
+
if (channel_number < 0) {
/* No channel information in frame payload */
return channel;
@@ -2072,12 +2086,12 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
if (!non_tx_data)
return;
- if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+ if (!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
return;
if (!wiphy->support_mbssid)
return;
if (wiphy->support_only_he_mbssid &&
- !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
+ !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
return;
new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp);
@@ -2444,10 +2458,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
len, gfp);
if (!res || !wiphy->support_mbssid ||
- !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+ !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
return res;
if (wiphy->support_only_he_mbssid &&
- !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
+ !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
return res;
non_tx_data.tx_bss = res;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 19b78d472283..ad6c16a06bcb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -167,6 +167,19 @@
__entry->center_freq1, __entry->freq1_offset, \
__entry->center_freq2
+#define FILS_AAD_ASSIGN(fa) \
+ do { \
+ if (fa) { \
+ ether_addr_copy(__entry->macaddr, fa->macaddr); \
+ __entry->kek_len = fa->kek_len; \
+ } else { \
+ eth_zero_addr(__entry->macaddr); \
+ __entry->kek_len = 0; \
+ } \
+ } while (0)
+#define FILS_AAD_PR_FMT \
+ "macaddr: %pM, kek_len: %d"
+
#define SINFO_ENTRY __field(int, generation) \
__field(u32, connected_time) \
__field(u32, inactive_time) \
@@ -2614,6 +2627,24 @@ DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr,
TP_ARGS(wiphy, wdev, cookie)
);
+TRACE_EVENT(rdev_set_fils_aad,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_fils_aad *fils_aad),
+ TP_ARGS(wiphy, netdev, fils_aad),
+ TP_STRUCT__entry(WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(u8, macaddr, ETH_ALEN)
+ __field(u8, kek_len)
+ ),
+ TP_fast_assign(WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ FILS_AAD_ASSIGN(fils_aad);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " FILS_AAD_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
+ __entry->kek_len)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 18dba3d7c638..5ff1f8726faf 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -80,6 +80,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
return 0; /* not supported */
switch (band) {
case NL80211_BAND_2GHZ:
+ case NL80211_BAND_LC:
if (chan == 14)
return MHZ_TO_KHZ(2484);
else if (chan < 14)
@@ -209,6 +210,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
WARN_ON(want);
break;
case NL80211_BAND_2GHZ:
+ case NL80211_BAND_LC:
want = 7;
for (i = 0; i < sband->n_bitrates; i++) {
switch (sband->bitrates[i].bitrate) {
@@ -1028,14 +1030,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
!(rdev->wiphy.interface_modes & (1 << ntype)))
return -EOPNOTSUPP;
- /* if it's part of a bridge, reject changing type to station/ibss */
- if (netif_is_bridge_port(dev) &&
- (ntype == NL80211_IFTYPE_ADHOC ||
- ntype == NL80211_IFTYPE_STATION ||
- ntype == NL80211_IFTYPE_P2P_CLIENT))
- return -EBUSY;
-
if (ntype != otype) {
+ /* if it's part of a bridge, reject changing type to station/ibss */
+ if (netif_is_bridge_port(dev) &&
+ (ntype == NL80211_IFTYPE_ADHOC ||
+ ntype == NL80211_IFTYPE_STATION ||
+ ntype == NL80211_IFTYPE_P2P_CLIENT))
+ return -EBUSY;
+
dev->ieee80211_ptr->use_4addr = false;
dev->ieee80211_ptr->mesh_id_up_len = 0;
wdev_lock(dev->ieee80211_ptr);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d6b500dc4208..f16074eb53c7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-void xp_release(struct xdp_buff_xsk *xskb)
-{
- xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
-}
-
-static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
-{
- u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
-
- offset += xskb->pool->headroom;
- if (!xskb->pool->unaligned)
- return xskb->orig_addr + offset;
- return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
-}
-
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 8de01aaac4a0..90c4e1e819d3 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool)
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
+ bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
struct xsk_buff_pool *pool;
struct xdp_buff_xsk *xskb;
- u32 i;
+ u32 i, entries;
- pool = kvzalloc(struct_size(pool, free_heads, umem->chunks),
- GFP_KERNEL);
+ entries = unaligned ? umem->chunks : 0;
+ pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL);
if (!pool)
goto out;
@@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->free_heads_cnt = umem->chunks;
pool->headroom = umem->headroom;
pool->chunk_size = umem->chunk_size;
- pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+ pool->chunk_shift = ffs(umem->chunk_size) - 1;
+ pool->unaligned = unaligned;
pool->frame_len = umem->chunk_size - umem->headroom -
XDP_PACKET_HEADROOM;
pool->umem = umem;
@@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- pool->free_heads[i] = xskb;
+ if (pool->unaligned)
+ pool->free_heads[i] = xskb;
+ else
+ xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
}
return pool;
@@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
if (pool->unaligned)
xp_check_dma_contiguity(dma_map);
+ else
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
+ }
err = xp_init_dma_info(pool, dma_map);
if (err) {
@@ -448,12 +459,9 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
if (pool->free_heads_cnt == 0)
return NULL;
- xskb = pool->free_heads[--pool->free_heads_cnt];
-
for (;;) {
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
pool->fq->queue_empty_descs++;
- xp_release(xskb);
return NULL;
}
@@ -466,17 +474,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
}
break;
}
- xskq_cons_release(pool->fq);
- xskb->orig_addr = addr;
- xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
- if (pool->dma_pages_cnt) {
- xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
- ~XSK_NEXT_PG_CONTIG_MASK) +
- (addr & ~PAGE_MASK);
- xskb->dma = xskb->frame_dma + pool->headroom +
- XDP_PACKET_HEADROOM;
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages_cnt)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
}
+
+ xskq_cons_release(pool->fq);
return xskb;
}
@@ -507,6 +515,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
}
EXPORT_SYMBOL(xp_alloc);
+static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 i, cached_cons, nb_entries;
+
+ if (max > pool->free_heads_cnt)
+ max = pool->free_heads_cnt;
+ max = xskq_cons_nb_entries(pool->fq, max);
+
+ cached_cons = pool->fq->cached_cons;
+ nb_entries = max;
+ i = max;
+ while (i--) {
+ struct xdp_buff_xsk *xskb;
+ u64 addr;
+ bool ok;
+
+ __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr);
+
+ ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
+ xp_check_aligned(pool, &addr);
+ if (unlikely(!ok)) {
+ pool->fq->invalid_descs++;
+ nb_entries--;
+ continue;
+ }
+
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages_cnt)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
+ }
+
+ *xdp = &xskb->xdp;
+ xdp++;
+ }
+
+ xskq_cons_release_n(pool->fq, max);
+ return nb_entries;
+}
+
+static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries)
+{
+ struct xdp_buff_xsk *xskb;
+ u32 i;
+
+ nb_entries = min_t(u32, nb_entries, pool->free_list_cnt);
+
+ i = nb_entries;
+ while (i--) {
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
+ list_del(&xskb->free_list_node);
+
+ *xdp = &xskb->xdp;
+ xdp++;
+ }
+ pool->free_list_cnt -= nb_entries;
+
+ return nb_entries;
+}
+
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 nb_entries1 = 0, nb_entries2;
+
+ if (unlikely(pool->dma_need_sync)) {
+ /* Slow path */
+ *xdp = xp_alloc(pool);
+ return !!*xdp;
+ }
+
+ if (unlikely(pool->free_list_cnt)) {
+ nb_entries1 = xp_alloc_reused(pool, xdp, max);
+ if (nb_entries1 == max)
+ return nb_entries1;
+
+ max -= nb_entries1;
+ xdp += nb_entries1;
+ }
+
+ nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max);
+ if (!nb_entries2)
+ pool->fq->queue_empty_descs++;
+
+ return nb_entries1 + nb_entries2;
+}
+EXPORT_SYMBOL(xp_alloc_batch);
+
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
if (pool->free_list_cnt >= count)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 9ae13cccfb28..e9aa2c236356 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -111,14 +111,18 @@ struct xsk_queue {
/* Functions that read and validate content from consumer rings. */
-static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
+static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+ u32 idx = cached_cons & q->ring_mask;
- if (q->cached_cons != q->cached_prod) {
- u32 idx = q->cached_cons & q->ring_mask;
+ *addr = ring->desc[idx];
+}
- *addr = ring->desc[idx];
+static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
+{
+ if (q->cached_cons != q->cached_prod) {
+ __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr);
return true;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 3df0861d4390..70a8c36f0ba6 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -530,7 +530,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+ if (xfrm_parse_spi(skb, nexthdr, &spi, &seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
goto drop;
}
@@ -560,7 +560,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
}
seq = 0;
- if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+ if (!spi && xfrm_parse_spi(skb, nexthdr, &spi, &seq)) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
goto drop;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 37d17a79617c..1a06585022ab 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2486,9 +2486,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
if (likely(xdst)) {
- struct dst_entry *dst = &xdst->u.dst;
-
- memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
+ memset_after(xdst, 0, u.dst);
} else
xdst = ERR_PTR(-ENOBUFS);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 03b66d154b2b..7c36cc1f3d79 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1961,24 +1961,65 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
+static int xfrm_notify_userpolicy(struct net *net)
+{
+ struct xfrm_userpolicy_default *up;
+ int len = NLMSG_ALIGN(sizeof(*up));
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err;
+
+ skb = nlmsg_new(len, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0);
+ if (nlh == NULL) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ up = nlmsg_data(nlh);
+ up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+
+ nlmsg_end(skb, nlh);
+
+ rcu_read_lock();
+ err = xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
+ rcu_read_unlock();
+
+ return err;
+}
+
static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
struct net *net = sock_net(skb->sk);
struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
- u8 dirmask;
- u8 old_default = net->xfrm.policy_default;
- if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
- return -EINVAL;
+ if (up->in == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN;
+ else if (up->in == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN;
- dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+ if (up->fwd == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD;
+ else if (up->fwd == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD;
- net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
- | (up->action << up->dirmask);
+ if (up->out == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT;
+ else if (up->out == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT;
rt_genid_bump_all(net);
+ xfrm_notify_userpolicy(net);
return 0;
}
@@ -1988,13 +2029,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct sk_buff *r_skb;
struct nlmsghdr *r_nlh;
struct net *net = sock_net(skb->sk);
- struct xfrm_userpolicy_default *r_up, *up;
+ struct xfrm_userpolicy_default *r_up;
int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
u32 portid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
- up = nlmsg_data(nlh);
-
r_skb = nlmsg_new(len, GFP_ATOMIC);
if (!r_skb)
return -ENOMEM;
@@ -2007,8 +2046,12 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
r_up = nlmsg_data(r_nlh);
- r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
- r_up->dirmask = up->dirmask;
+ r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
nlmsg_end(r_skb, r_nlh);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
@@ -2912,7 +2955,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
/* clear the padding bytes */
- memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard));
+ memset_after(ue, 0, hard);
err = xfrm_mark_put(skb, &x->mark);
if (err)