diff options
Diffstat (limited to 'net')
107 files changed, 1059 insertions, 744 deletions
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8500f56cbd10..c350ab63cd54 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -25,6 +25,7 @@ #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> +#include <linux/preempt.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> @@ -83,11 +84,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size) */ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) { - const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; + const struct batadv_bla_backbone_gw *gw; u32 hash = 0; - hash = jhash(&claim->addr, sizeof(claim->addr), hash); - hash = jhash(&claim->vid, sizeof(claim->vid), hash); + gw = (struct batadv_bla_backbone_gw *)data; + hash = jhash(&gw->orig, sizeof(gw->orig), hash); + hash = jhash(&gw->vid, sizeof(gw->vid), hash); return hash % size; } @@ -1579,13 +1581,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv) } /** - * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the soft interface information - * @skb: contains the bcast_packet to be checked + * @skb: contains the multicast packet to be checked + * @payload_ptr: pointer to position inside the head buffer of the skb + * marking the start of the data to be CRC'ed + * @orig: originator mac address, NULL if unknown * - * check if it is on our broadcast list. Another gateway might - * have sent the same packet because it is connected to the same backbone, - * so we have to remove this duplicate. + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. * * This is performed by checking the CRC, which will tell us * with a good chance that it is the same packet. If it is furthermore @@ -1594,19 +1599,17 @@ int batadv_bla_init(struct batadv_priv *bat_priv) * * Return: true if a packet is in the duplicate list, false otherwise. */ -bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb) +static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb, u8 *payload_ptr, + const u8 *orig) { - int i, curr; - __be32 crc; - struct batadv_bcast_packet *bcast_packet; struct batadv_bcast_duplist_entry *entry; bool ret = false; - - bcast_packet = (struct batadv_bcast_packet *)skb->data; + int i, curr; + __be32 crc; /* calculate the crc ... */ - crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1)); + crc = batadv_skb_crc32(skb, payload_ptr); spin_lock_bh(&bat_priv->bla.bcast_duplist_lock); @@ -1625,8 +1628,21 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, if (entry->crc != crc) continue; - if (batadv_compare_eth(entry->orig, bcast_packet->orig)) - continue; + /* are the originators both known and not anonymous? */ + if (orig && !is_zero_ether_addr(orig) && + !is_zero_ether_addr(entry->orig)) { + /* If known, check if the new frame came from + * the same originator: + * We are safe to take identical frames from the + * same orig, if known, as multiplications in + * the mesh are detected via the (orig, seqno) pair. + * So we can be a bit more liberal here and allow + * identical frames from the same orig which the source + * host might have sent multiple times on purpose. + */ + if (batadv_compare_eth(entry->orig, orig)) + continue; + } /* this entry seems to match: same crc, not too old, * and from another gw. therefore return true to forbid it. @@ -1642,7 +1658,14 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; - ether_addr_copy(entry->orig, bcast_packet->orig); + + /* known originator */ + if (orig) + ether_addr_copy(entry->orig, orig); + /* anonymous originator */ + else + eth_zero_addr(entry->orig); + bat_priv->bla.bcast_duplist_curr = curr; out: @@ -1652,6 +1675,48 @@ out: } /** + * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the multicast packet to be checked, decapsulated from a + * unicast_packet + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL); +} + +/** + * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the bcast_packet to be checked + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_bcast_packet *bcast_packet; + u8 *payload_ptr; + + bcast_packet = (struct batadv_bcast_packet *)skb->data; + payload_ptr = (u8 *)(bcast_packet + 1); + + return batadv_bla_check_duplist(bat_priv, skb, payload_ptr, + bcast_packet->orig); +} + +/** * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. * @bat_priv: the bat priv with all the soft interface information @@ -1812,7 +1877,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame - * @is_bcast: the packet came in a broadcast packet type. + * @packet_type: the batman packet type this frame came in * * batadv_bla_rx avoidance checks if: * * we have to race for a claim @@ -1824,7 +1889,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * further process the skb. */ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast) + unsigned short vid, int packet_type) { struct batadv_bla_backbone_gw *backbone_gw; struct ethhdr *ethhdr; @@ -1846,9 +1911,32 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto handled; if (unlikely(atomic_read(&bat_priv->bla.num_requests))) - /* don't allow broadcasts while requests are in flight */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) - goto handled; + /* don't allow multicast packets while requests are in flight */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + /* Both broadcast flooding or multicast-via-unicasts + * delivery might send to multiple backbone gateways + * sharing the same LAN and therefore need to coordinate + * which backbone gateway forwards into the LAN, + * by claiming the payload source address. + * + * Broadcast flooding and multicast-via-unicasts + * delivery use the following two batman packet types. + * Note: explicitly exclude BATADV_UNICAST_4ADDR, + * as the DHCP gateway feature will send explicitly + * to only one BLA gateway, so the claiming process + * should be avoided there. + */ + if (packet_type == BATADV_BCAST || + packet_type == BATADV_UNICAST) + goto handled; + + /* potential duplicates from foreign BLA backbone gateways via + * multicast-in-unicast packets + */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + packet_type == BATADV_UNICAST && + batadv_bla_check_ucast_duplist(bat_priv, skb)) + goto handled; ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; @@ -1883,13 +1971,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto allow; } - /* if it is a broadcast ... */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { + /* if it is a multicast ... */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) { /* ... drop it. the responsible gateway is in charge. * - * We need to check is_bcast because with the gateway + * We need to check packet type because with the gateway * feature, broadcasts (like DHCP requests) may be sent - * using a unicast packet type. + * using a unicast 4 address packet type. See comment above. */ goto handled; } else { diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 41edb2c4a327..a81c41b636f9 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -35,7 +35,7 @@ static inline bool batadv_bla_is_loopdetect_mac(const uint8_t *mac) #ifdef CONFIG_BATMAN_ADV_BLA bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast); + unsigned short vid, int packet_type); bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); bool batadv_bla_is_backbone_gw(struct sk_buff *skb, @@ -66,7 +66,7 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, static inline bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, - bool is_bcast) + int packet_type) { return false; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index bdc4a1fba1c6..ca24a2e522b7 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -51,6 +51,7 @@ #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -1435,6 +1436,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, } /** + * batadv_mcast_forw_send_orig() - send a multicast packet to an originator + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to send + * @vid: the vlan identifier + * @orig_node: the originator to send the packet to + * + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + */ +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + /* Avoid sending multicast-in-unicast packets to other BLA + * gateways - they already got the frame from the LAN side + * we share with them. + * TODO: Refactor to take BLA into account earlier, to avoid + * reducing the mcast_fanout count. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) { + dev_kfree_skb(skb); + return NET_XMIT_SUCCESS; + } + + return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, + orig_node, vid); +} + +/** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit @@ -1471,8 +1501,8 @@ batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_entry->orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, + orig_entry->orig_node); } rcu_read_unlock(); @@ -1513,8 +1543,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1551,8 +1580,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1618,8 +1646,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1656,8 +1683,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index ebf825991ecd..3e114bc5ca3b 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -46,6 +46,11 @@ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node); + int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -72,6 +77,16 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, } static inline int +batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + +static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 27cdf5e4349a..9e5c71e406ff 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -826,6 +826,10 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, vid = batadv_get_vid(skb, hdr_len); ethhdr = (struct ethhdr *)(skb->data + hdr_len); + /* do not reroute multicast frames in a unicast header */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + return true; + /* check if the destination client was served by this node and it is now * roaming. In this case, it means that the node has got a ROAM_ADV * message and that it knows the new destination in the mesh to re-route diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 23833a0ba5e6..cdde943c1b83 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -364,9 +364,8 @@ send: goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (mcast_single_orig) { - ret = batadv_send_skb_unicast(bat_priv, skb, - BATADV_UNICAST, 0, - mcast_single_orig, vid); + ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, + mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { ret = batadv_mcast_forw_send(bat_priv, skb, vid); } else { @@ -425,10 +424,10 @@ void batadv_interface_rx(struct net_device *soft_iface, struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; - bool is_bcast; + int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; - is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); + packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); @@ -471,7 +470,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ - if (batadv_bla_rx(bat_priv, skb, vid, is_bcast)) + if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index b18cdf03edb3..dfec65eca8a6 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -88,9 +88,10 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip(struct net_device *dev, void *data) +static int br_chk_addr_ip(struct net_device *dev, + struct netdev_nested_priv *priv) { - __be32 ip = *(__be32 *)data; + __be32 ip = *(__be32 *)priv->data; struct in_device *in_dev; __be32 addr = 0; @@ -107,11 +108,15 @@ static int br_chk_addr_ip(struct net_device *dev, void *data) static bool br_is_local_ip(struct net_device *dev, __be32 ip) { - if (br_chk_addr_ip(dev, &ip)) + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; + + if (br_chk_addr_ip(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv)) return true; return false; @@ -361,9 +366,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip6(struct net_device *dev, void *data) +static int br_chk_addr_ip6(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct in6_addr *addr = (struct in6_addr *)data; + struct in6_addr *addr = (struct in6_addr *)priv->data; if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) return 1; @@ -374,11 +380,15 @@ static int br_chk_addr_ip6(struct net_device *dev, void *data) static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) { - if (br_chk_addr_ip6(dev, addr)) + struct netdev_nested_priv priv = { + .data = (void *)addr, + }; + + if (br_chk_addr_ip6(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv)) return true; return false; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9db504baa094..32ac8343b0ba 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -413,6 +413,8 @@ void br_fdb_delete_by_port(struct net_bridge *br, if (!do_all) if (test_bit(BR_FDB_STATIC, &f->flags) || + (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && + !test_bit(BR_FDB_OFFLOADED, &f->flags)) || (vid && f->key.vlan_id != vid)) continue; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 147d52596e17..da310f0ca725 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -380,6 +380,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, u32 filter_mask, const struct net_device *dev) { u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + struct nlattr *af = NULL; struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; @@ -423,11 +424,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_nest_end(skb, nest); } + if (filter_mask & (RTEXT_FILTER_BRVLAN | + RTEXT_FILTER_BRVLAN_COMPRESSED | + RTEXT_FILTER_MRP)) { + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + } + /* Check if the VID information is requested */ if ((filter_mask & RTEXT_FILTER_BRVLAN) || (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { struct net_bridge_vlan_group *vg; - struct nlattr *af; int err; /* RCU needed because of the VLAN locking rules (rcu || rtnl) */ @@ -441,11 +449,6 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); goto done; } - af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); - if (!af) { - rcu_read_unlock(); - goto nla_put_failure; - } if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) err = br_fill_ifvlaninfo_compressed(skb, vg); else @@ -456,32 +459,25 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); if (err) goto nla_put_failure; - - nla_nest_end(skb, af); } if (filter_mask & RTEXT_FILTER_MRP) { - struct nlattr *af; int err; if (!br_mrp_enabled(br) || port) goto done; - af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); - if (!af) - goto nla_put_failure; - rcu_read_lock(); err = br_mrp_fill_info(skb, br); rcu_read_unlock(); if (err) goto nla_put_failure; - - nla_nest_end(skb, af); } done: + if (af) + nla_nest_end(skb, af); nlmsg_end(skb, nlh); return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f9092c71225f..ee8780080be5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, } } -static int __br_vlan_get_pvid(const struct net_device *dev, - struct net_bridge_port *p, u16 *p_pvid) +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) @@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev, *p_pvid = br_get_pvid(vg); return 0; } - -int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) -{ - ASSERT_RTNL(); - - return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid); -} EXPORT_SYMBOL_GPL(br_vlan_get_pvid); int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) { - return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); @@ -1353,7 +1360,7 @@ static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) } static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, - __always_unused void *data) + __always_unused struct netdev_nested_priv *priv) { return br_vlan_is_bind_vlan_dev(dev); } @@ -1376,9 +1383,9 @@ struct br_vlan_bind_walk_data { }; static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_bind_walk_data *data = data_in; + struct br_vlan_bind_walk_data *data = priv->data; int found = 0; if (br_vlan_is_bind_vlan_dev(dev) && @@ -1396,10 +1403,13 @@ br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) struct br_vlan_bind_walk_data data = { .vid = vid, }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, - &data); + &priv); rcu_read_unlock(); return data.result; @@ -1480,9 +1490,9 @@ struct br_vlan_link_state_walk_data { }; static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_link_state_walk_data *data = data_in; + struct br_vlan_link_state_walk_data *data = priv->data; if (br_vlan_is_bind_vlan_dev(vlan_dev)) br_vlan_set_vlan_dev_state(data->br, vlan_dev); @@ -1496,10 +1506,13 @@ static void br_vlan_link_state_change(struct net_device *dev, struct br_vlan_link_state_walk_data data = { .br = br }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, - &data); + &priv); rcu_read_unlock(); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index bdfd66ba3843..d4d7a0e52491 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (page_count(page) >= 1 && !PageSlab(page)) + if (sendpage_ok(page)) sendpage = sock->ops->sendpage; else sendpage = sock_no_sendpage; diff --git a/net/core/dev.c b/net/core/dev.c index 4086d335978c..4906b44af850 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6812,9 +6812,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, + struct netdev_nested_priv *priv) { - struct net_device *dev = data; + struct net_device *dev = (struct net_device *)priv->data; return upper_dev == dev; } @@ -6831,10 +6832,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6851,8 +6856,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6997,8 +7006,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, static int __netdev_walk_all_upper_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7010,7 +7019,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7046,8 +7055,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7058,7 +7067,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7094,10 +7103,15 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); static bool __netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } /** @@ -7215,8 +7229,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev, int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7227,7 +7241,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7262,8 +7276,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); static int __netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7275,7 +7289,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7364,22 +7378,34 @@ static u8 __netdev_lower_depth(struct net_device *dev) return max_depth; } -static int __netdev_update_upper_level(struct net_device *dev, void *data) +static int __netdev_update_upper_level(struct net_device *dev, + struct netdev_nested_priv *__unused) { dev->upper_level = __netdev_upper_depth(dev) + 1; return 0; } -static int __netdev_update_lower_level(struct net_device *dev, void *data) +static int __netdev_update_lower_level(struct net_device *dev, + struct netdev_nested_priv *priv) { dev->lower_level = __netdev_lower_depth(dev) + 1; + +#ifdef CONFIG_LOCKDEP + if (!priv) + return 0; + + if (priv->flags & NESTED_SYNC_IMM) + dev->nested_level = dev->lower_level - 1; + if (priv->flags & NESTED_SYNC_TODO) + net_unlink_todo(dev); +#endif return 0; } int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7390,7 +7416,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7650,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, + struct netdev_nested_priv *priv, struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { @@ -7706,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); return 0; @@ -7733,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, false, - NULL, NULL, extack); + NULL, NULL, &priv, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -7757,21 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info, extack); + upper_priv, upper_info, &priv, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -/** - * netdev_upper_dev_unlink - Removes a link to upper device - * @dev: device - * @upper_dev: new upper device - * - * Removes a link to device which is upper to this one. The caller must hold - * the RTNL lock. - */ -void netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev, + struct netdev_nested_priv *priv) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { @@ -7796,9 +7826,28 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); +} + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_TODO, + .data = NULL, + }; + + __netdev_upper_dev_unlink(dev, upper_dev, &priv); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -7834,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, struct net_device *dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; int err; if (!new_dev) @@ -7841,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, if (old_dev && new_dev != old_dev) netdev_adjacent_dev_disable(dev, old_dev); - - err = netdev_upper_dev_link(new_dev, dev, extack); + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, + extack); if (err) { if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); @@ -7857,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + if (!new_dev || !old_dev) return; @@ -7864,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, return; netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(old_dev, dev); + __netdev_upper_dev_unlink(old_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_commit); @@ -7872,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; + if (!new_dev) return; if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(new_dev, dev); + __netdev_upper_dev_unlink(new_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_abort); @@ -8647,7 +8710,7 @@ int dev_get_port_parent_id(struct net_device *dev, if (!first.id_len) first = *ppid; else if (memcmp(&first, ppid, sizeof(*ppid))) - return -ENODATA; + return -EOPNOTSUPP; } return err; @@ -10062,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev) void netdev_run_todo(void) { struct list_head list; +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; + + list_replace_init(&net_unlink_list, &unlink_list); + + while (!list_empty(&unlink_list)) { + struct net_device *dev = list_first_entry(&unlink_list, + struct net_device, + unlink_list); + list_del(&dev->unlink_list); + dev->nested_level = dev->lower_level - 1; + } +#endif /* Snapshot list, allow later requests */ list_replace_init(&net_todo_list, &list); @@ -10274,6 +10350,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; dev->lower_level = 1; +#ifdef CONFIG_LOCKDEP + dev->nested_level = 0; + INIT_LIST_HEAD(&dev->unlink_list); +#endif INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 54cd568e7c2f..fa1c37ec40c9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) * larger. */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/core/dst.c b/net/core/dst.c index d6b6ced0d451..0c01bd8d9d81 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -144,7 +144,7 @@ static void dst_destroy_rcu(struct rcu_head *head) /* Operations to mark dst as DEAD and clean up the net device referenced * by dst: - * 1. put the dst under loopback interface and discard all tx/rx packets + * 1. put the dst under blackhole interface and discard all tx/rx packets * on this route. * 2. release the net_device * This function should be called when removing routes from the fib tree diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 51678a528f85..7bcfb16854cb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,7 +16,7 @@ #include <net/ip_tunnels.h> #include <linux/indirect_call_wrapper.h> -#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES) #ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) diff --git a/net/core/filter.c b/net/core/filter.c index 1f647ab986b6..b5f3faac5e3b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4838,6 +4838,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.saddr = params->ipv4_src; fl4.fl4_sport = params->sport; fl4.fl4_dport = params->dport; + fl4.flowi4_multipath_hash = 0; if (flags & BPF_FIB_LOOKUP_DIRECT) { u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; @@ -7065,8 +7066,6 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, bool indirect = BPF_MODE(orig->code) == BPF_IND; struct bpf_insn *insn = insn_buf; - /* We're guaranteed here that CTX is in R6. */ - *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); if (!indirect) { *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); } else { @@ -7074,6 +7073,8 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, if (orig->imm) *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); } + /* We're guaranteed here that CTX is in R6. */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); switch (BPF_SIZE(orig->code)) { case BPF_B: @@ -9522,7 +9523,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct tcp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk->sk_family == AF_INET6) return (unsigned long)sk; @@ -9540,7 +9541,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; return (unsigned long)NULL; @@ -9557,13 +9558,19 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) { + /* BTF types for tcp_timewait_sock and inet_timewait_sock are not + * generated if CONFIG_INET=n. Trigger an explicit generation here. + */ + BTF_TYPE_EMIT(struct inet_timewait_sock); + BTF_TYPE_EMIT(struct tcp_timewait_sock); + #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif @@ -9582,12 +9589,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) { #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif @@ -9609,7 +9616,7 @@ BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct udp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) return (unsigned long)sk; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcd61aca343e..944ab214e5ae 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -251,10 +251,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return id; } @@ -264,12 +264,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); put_net(peer); if (id < 0) @@ -534,20 +534,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -760,9 +760,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -771,7 +771,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6faf73d6a0f7..2b48cb0cc684 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5622,7 +5622,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); - if (ethernet) + if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; @@ -5662,7 +5662,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); - if (ethernet) { + if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 84dde5a2066e..16014ad19406 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int prio; int err; if (!ops) @@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, struct dcbnl_buffer *buffer = nla_data(ieee[DCB_ATTR_DCB_BUFFER]); + for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) { + if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) { + err = -EINVAL; + goto err; + } + } + err = ops->dcbnl_setbuffer(netdev, buffer); if (err) goto err; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9af1a2d0cec4..16e5f98d4882 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1799,15 +1799,27 @@ int dsa_slave_create(struct dsa_port *port) dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); - ret = register_netdev(slave_dev); + rtnl_lock(); + + ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); + rtnl_unlock(); goto out_phy; } + ret = netdev_upper_dev_link(master, slave_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + return 0; +out_unregister: + unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -1824,16 +1836,18 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { + struct net_device *master = dsa_slave_to_master(slave_dev); struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); netif_carrier_off(slave_dev); rtnl_lock(); + netdev_upper_dev_unlink(master, slave_dev); + unregister_netdevice(slave_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - unregister_netdev(slave_dev); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); free_percpu(p->stats64); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 42f327c06dca..b4fc05cafaa6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct sk_buff *clone = DSA_SKB_CB(skb)->clone; + rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + /* Retrieve timestamp ID populated inside skb->cb[0] of the + * clone by ocelot_port_add_txtstamp_skb + */ + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + rew_op |= clone->cb[0] << 3; packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5c2072765be7..0c3f54baec4e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -866,7 +866,7 @@ static const struct genl_multicast_group ethtool_nl_mcgrps[] = { [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME }, }; -static struct genl_family ethtool_genl_family = { +static struct genl_family ethtool_genl_family __ro_after_init = { .name = ETHTOOL_GENL_NAME, .version = ETHTOOL_GENL_VERSION, .netnsok = true, diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 84f23289475b..d93bf2da0f34 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -200,7 +200,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.dev, - ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, ETHTOOL_A_TUNNEL_INFO_HEADER, info, &reply_payload); if (!rskb) { @@ -273,7 +273,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) goto cont; ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_TUNNEL_INFO_GET); + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); if (!ehdr) { ret = -EMSGSIZE; goto out; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 06c3cd988760..0e4681cf71db 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -76,7 +76,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]); if (proto >= HSR_PROTOCOL_MAX) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n"); + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol"); return -EINVAL; } @@ -84,14 +84,14 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto_version = HSR_V0; } else { if (proto == HSR_PROTOCOL_PRP) { - NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n"); + NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported"); return -EINVAL; } proto_version = nla_get_u8(data[IFLA_HSR_VERSION]); if (proto_version > HSR_V1) { NL_SET_ERR_MSG_MOD(extack, - "Only HSR version 0/1 supported\n"); + "Only HSR version 0/1 supported"); return -EINVAL; } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 41079490a118..86a23e4a6a50 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -362,6 +362,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4a98dd736270..f1bd95f243b3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -186,8 +186,8 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); -static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr **req_nlas) +static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr **req_nlas) { struct nlattr *nla; int remaining; @@ -195,9 +195,13 @@ static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { int type = nla_type(nla); + if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) + return -EINVAL; + if (type < __INET_DIAG_REQ_MAX) req_nlas[type] = nla; } + return 0; } static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, @@ -574,7 +578,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, int err, protocol; memset(&dump_data, 0, sizeof(dump_data)); - inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + if (err) + return err; + protocol = inet_diag_get_protocol(req, &dump_data); handler = inet_diag_lock_handler(protocol); @@ -1180,8 +1187,11 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) if (!cb_data) return -ENOMEM; - inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); - + err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); + if (err) { + kfree(cb_data); + return err; + } nla = cb_data->inet_diag_nla_bc; if (nla) { err = inet_diag_bc_audit(nla, skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 61f802d5350c..e6f2ada9e7d5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -74,6 +74,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_ecn.h> #include <net/lwtunnel.h> #include <linux/bpf-cgroup.h> #include <linux/igmp.h> @@ -1703,7 +1704,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos; + inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..b2ea1a8c5fd6 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -554,6 +554,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr, attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); + md->gbp &= VXLAN_GBP_MASK; info->key.tun_flags |= TUNNEL_VXLAN_OPT; } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 49daaed89764..f687abb069fa 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -490,6 +490,7 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .priority = 0, }; +#if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .handler = vti_rcv_tunnel, .cb_handler = vti_rcv_cb, @@ -497,6 +498,7 @@ static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .priority = 0, }; #endif +#endif static int __net_init vti_init_net(struct net *net) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1074df726ec0..8d5e1695b9aa 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -293,6 +293,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), + SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca6bcab7b03..58642b29a499 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -786,8 +786,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, skb); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1013,6 +1015,7 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + struct net *net = dev_net(dst->dev); u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; @@ -1033,9 +1036,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + if (fib_lookup(net, fl4, &res, 0) == 0) { + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, NULL); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -2147,6 +2152,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; @@ -2667,8 +2673,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); - fl4->flowi4_oif = dev_out->ifindex; - make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f0794f0232ba..e03756631541 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,7 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { - refcount_set(&req->rsk_refcnt, 2); + reqsk_put(req); return child; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 31f3b858db81..2135ee7c806d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (IS_ENABLED(CONFIG_DEBUG_VM) && - WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + WARN_ONCE(!sendpage_ok(page), + "page must not be a Slab one and have page_count > 0")) return -EINVAL; /* Wait for a connection to finish. One exception is TCP Fast Open diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 184ea556f50e..b1ce2054291d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -885,21 +885,34 @@ struct tcp_sacktag_state { struct rate_sample *rate; }; -/* Take a notice that peer is sending D-SACKs */ +/* Take a notice that peer is sending D-SACKs. Skip update of data delivery + * and spurious retransmission information if this DSACK is unlikely caused by + * sender's action: + * - DSACKed sequence range is larger than maximum receiver's window. + * - Total no. of DSACKed segments exceed the total no. of retransmitted segs. + */ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, u32 end_seq, struct tcp_sacktag_state *state) { u32 seq_len, dup_segs = 1; - if (before(start_seq, end_seq)) { - seq_len = end_seq - start_seq; - if (seq_len > tp->mss_cache) - dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); - } + if (!before(start_seq, end_seq)) + return 0; + + seq_len = end_seq - start_seq; + /* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */ + if (seq_len > tp->max_window) + return 0; + if (seq_len > tp->mss_cache) + dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + + tp->dsack_dups += dup_segs; + /* Skip the DSACK if dup segs weren't retransmitted by sender */ + if (tp->dsack_dups > tp->total_retrans) + return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rack.dsack_seen = 1; - tp->dsack_dups += dup_segs; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ @@ -1153,6 +1166,11 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); + if (!dup_segs) { /* Skip dubious DSACK */ + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS); + return false; + } + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs); /* D-SACK for already forgotten data... Do dumb counting. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5084333b5ab6..592c73962723 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1788,12 +1788,12 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, hdrlen); if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { - thtail->window = th->window; - TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; - if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) + if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) { TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + thtail->window = th->window; + } /* We have to update both TCP_SKB_CB(tail)->tcp_flags and * thtail->fin, so that the fast path in tcp_rcv_established() diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 76bff79d6fed..747f56e0c636 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -303,6 +303,7 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 + select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 25a90f3f705c..4a664ad4f4d4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1993,14 +1993,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Need to own table->tb6_lock */ int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; struct fib6_info __rcu **rtp; struct fib6_info __rcu **rtp_next; + struct fib6_table *table; + struct fib6_node *fn; + + if (rt == net->ipv6.fib6_null_entry) + return -ENOENT; - if (!fn || rt == net->ipv6.fib6_null_entry) + table = rt->fib6_table; + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (!fn) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5e7e25e2523a..fb075d9545b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4202,7 +4202,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 314973033d03..26d2f8ba7029 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -560,7 +560,9 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat, if (rate->idx < 0 || !rate->count) return -1; - if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + stat->bw = RATE_INFO_BW_160; + else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) stat->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) stat->bw = RATE_INFO_BW_40; @@ -668,20 +670,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, * This will not be very accurate, but much better than simply * assuming un-aggregated tx in all cases. */ - if (duration > 400) /* <= VHT20 MCS2 1S */ + if (duration > 400 * 1024) /* <= VHT20 MCS2 1S */ agg_shift = 1; - else if (duration > 250) /* <= VHT20 MCS3 1S or MCS1 2S */ + else if (duration > 250 * 1024) /* <= VHT20 MCS3 1S or MCS1 2S */ agg_shift = 2; - else if (duration > 150) /* <= VHT20 MCS5 1S or MCS3 2S */ + else if (duration > 150 * 1024) /* <= VHT20 MCS5 1S or MCS2 2S */ agg_shift = 3; - else + else if (duration > 70 * 1024) /* <= VHT20 MCS5 2S */ agg_shift = 4; + else if (stat.encoding != RX_ENC_HE || + duration > 20 * 1024) /* <= HE40 MCS6 2S */ + agg_shift = 5; + else + agg_shift = 6; duration *= len; duration /= AVG_PKT_SIZE; duration /= 1024; + duration += (overhead >> agg_shift); - return duration + (overhead >> agg_shift); + return max_t(u32, duration, 4); } if (!conf) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ac870309b911..2e400b0ff696 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4861,6 +4861,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; int ret; u32 i; @@ -4879,7 +4880,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!sband->vht_cap.vht_supported && !is_6ghz) { + if (!sband->vht_cap.vht_supported && is_5ghz) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 836cde516a18..a959ebf56852 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -451,7 +451,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == NL80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ || + status->band == NL80211_BAND_6GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c8504ffc71a1..8d3bfc0fe176 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3353,9 +3353,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, he_chandef.center_freq1 = ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0, NL80211_BAND_6GHZ); - he_chandef.center_freq2 = - ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, - NL80211_BAND_6GHZ); + if (support_80_80 || support_160) + he_chandef.center_freq2 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); } if (!cfg80211_chandef_valid(&he_chandef)) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 9c6045f9c24d..d1b64d0751f2 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -168,10 +168,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* take some capabilities as-is */ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); vht_cap->cap = cap_info; - vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_RXLDPC | + vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_VHT_TXOP_PS | IEEE80211_VHT_CAP_HTC_VHT | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | @@ -180,6 +177,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK, + own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK); + /* and some based on our own capabilities */ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index ab52811523e9..c829e4a75325 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -34,11 +34,11 @@ void ieee802154_xmit_worker(struct work_struct *work) if (res) goto err_tx; - ieee802154_xmit_complete(&local->hw, skb, false); - dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + ieee802154_xmit_complete(&local->hw, skb, false); + return; err_tx: @@ -78,6 +78,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) /* async is priority, otherwise sync is fallback */ if (local->ops->xmit_async) { + unsigned int len = skb->len; + ret = drv_xmit_async(local, skb); if (ret) { ieee802154_wake_queue(&local->hw); @@ -85,7 +87,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) } dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += len; } else { local->tx_skb = skb; queue_work(local->workqueue, &local->tx_work); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 7fa822b55c34..888bbbbb3e8a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -451,7 +451,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext) { - u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq); + /* The write_seq value has already been incremented, so the actual + * sequence number for the DATA_FIN is one less. + */ + u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1; if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values @@ -460,10 +463,7 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; - /* The write_seq value has already been incremented, so - * the actual sequence number for the DATA_FIN is one less. - */ - ext->data_seq = data_fin_tx_seq - 1; + ext->data_seq = data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) { @@ -518,11 +518,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (subflow->use_64bit_ack) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -782,7 +782,7 @@ static void update_una(struct mptcp_sock *msk, } } -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) { /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values @@ -792,7 +792,8 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first)) return false; - WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq); + WRITE_ONCE(msk->rcv_data_fin_seq, + expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; @@ -875,7 +876,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) && + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && schedule_work(&msk->work)) sock_hold(subflow->conn); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c8820c4156e6..770da3627848 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -66,6 +66,16 @@ static bool addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } +static bool address_zero(const struct mptcp_addr_info *addr) +{ + struct mptcp_addr_info zero; + + memset(&zero, 0, sizeof(zero)); + zero.family = addr->family; + + return addresses_equal(addr, &zero, false); +} + static void local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { @@ -171,9 +181,9 @@ static void check_work_pending(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_addr_info remote = { 0 }; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; - struct mptcp_addr_info remote; struct pm_nl_pernet *pernet; pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); @@ -323,10 +333,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)msk, &skc_local); + local_address((struct sock_common *)skc, &skc_local); if (addresses_equal(&msk_local, &skc_local, false)) return 0; + if (address_zero(&skc_local)) + return 0; + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); rcu_read_lock(); @@ -341,7 +354,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return ret; /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 365ba96c84b0..5d747c6a610e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -123,7 +123,7 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_ext_reset(skb); skb_orphan(skb); - msk->ack_seq += copy_len; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (offset == 0 && tail) { @@ -261,7 +261,7 @@ static void mptcp_check_data_fin(struct sock *sk) if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) { struct mptcp_subflow_context *subflow; - msk->ack_seq++; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); sk->sk_shutdown |= RCV_SHUTDOWN; @@ -1720,7 +1720,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; - msk->ack_seq = ack_seq; + WRITE_ONCE(msk->ack_seq, ack_seq); } sock_reset_flag(nsk, SOCK_RCU_FREE); @@ -2072,7 +2072,7 @@ bool mptcp_finish_join(struct sock *sk) parent_sock = READ_ONCE(parent->sk_socket); if (parent_sock && !sk->sk_socket) mptcp_sock_graft(sk, parent_sock); - subflow->map_seq = msk->ack_seq; + subflow->map_seq = READ_ONCE(msk->ack_seq); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60b27d44c184..20f04ac85409 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -387,7 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq); +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8cac2655c82..6f035af1c9d2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -731,7 +731,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (mpext->data_fin == 1) { if (data_len == 1) { - mptcp_update_rcv_data_fin(msk, mpext->data_seq); + bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, + mpext->dsn64); pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS @@ -742,11 +743,23 @@ static enum mapping_status get_mapping_status(struct sock *ssk, skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } else { + if (updated && schedule_work(&msk->work)) + sock_hold((struct sock *)msk); + return MAPPING_DATA_FIN; } } else { - mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len); - pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len); + u64 data_fin_seq = mpext->data_seq + data_len - 1; + + /* If mpext->data_seq is a 32-bit value, data_fin_seq + * must also be limited to 32 bits. + */ + if (!mpext->dsn64) + data_fin_seq &= GENMASK_ULL(31, 0); + + mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + data_fin_seq, mpext->dsn64); } /* Adjust for DATA_FIN using 1 byte of sequence space */ @@ -1063,6 +1076,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; struct sockaddr_storage addr; + int remote_id = remote->id; int local_id = loc->id; struct socket *sf; struct sock *ssk; @@ -1107,10 +1121,11 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, goto failed; mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token, - local_id); + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + remote_token, local_id, remote_id); subflow->remote_token = remote_token; subflow->local_id = local_id; + subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = 1; mptcp_info2sockaddr(remote, &addr); @@ -1347,6 +1362,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; new_ctx->local_id = subflow_req->local_id; + new_ctx->remote_id = subflow_req->remote_id; new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 832eabecfbdd..c3a4214dc958 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -851,7 +851,6 @@ static int ctnetlink_done(struct netlink_callback *cb) } struct ctnetlink_filter { - u_int32_t cta_flags; u8 family; u_int32_t orig_flags; @@ -906,10 +905,6 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_zone *zone, u_int32_t flags); -/* applied on filters */ -#define CTA_FILTER_F_CTA_MARK (1 << 0) -#define CTA_FILTER_F_CTA_MARK_MASK (1 << 1) - static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { @@ -930,14 +925,10 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) #ifdef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK]) { filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK); - - if (cda[CTA_MARK_MASK]) { + if (cda[CTA_MARK_MASK]) filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK); - } else { + else filter->mark.mask = 0xffffffff; - } } else if (cda[CTA_MARK_MASK]) { err = -EINVAL; goto err_filter; @@ -1117,11 +1108,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) && - (ct->mark & filter->mark.mask) != filter->mark.val) - goto ignore_entry; - else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) && - ct->mark != filter->mark.val) + if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif @@ -1404,7 +1391,8 @@ ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], if (err < 0) return err; - + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 95f79980348c..47e9319d2cf3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -565,6 +565,7 @@ static int nf_ct_netns_inet_get(struct net *net) int err; err = nf_ct_netns_do_get(net, NFPROTO_IPV4); +#if IS_ENABLED(CONFIG_IPV6) if (err < 0) goto err1; err = nf_ct_netns_do_get(net, NFPROTO_IPV6); @@ -575,6 +576,7 @@ static int nf_ct_netns_inet_get(struct net *net) err2: nf_ct_netns_put(net, NFPROTO_IPV4); err1: +#endif return err; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7dc1cbf40ea..4603b667973a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,6 +684,18 @@ nla_put_failure: return -1; } +struct nftnl_skb_parms { + bool report; +}; +#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) + +static void nft_notify_enqueue(struct sk_buff *skb, bool report, + struct list_head *notify_list) +{ + NFT_CB(skb).report = report; + list_add_tail(&skb->list, notify_list); +} + static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; @@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, - gfp_flags); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nft_notify_enqueue(skb, report, &net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) mutex_unlock(&net->nft.commit_mutex); } +static void nft_commit_notify(struct net *net, u32 portid) +{ + struct sk_buff *batch_skb = NULL, *nskb, *skb; + unsigned char *data; + int len; + + list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + if (!batch_skb) { +new_batch: + batch_skb = skb; + len = NLMSG_GOODSIZE - skb->len; + list_del(&skb->list); + continue; + } + len -= skb->len; + if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { + data = skb_put(batch_skb, skb->len); + memcpy(data, skb->data, skb->len); + list_del(&skb->list); + kfree_skb(skb); + continue; + } + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + goto new_batch; + } + + if (batch_skb) { + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + } + + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_release(net); @@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.module_list); + INIT_LIST_HEAD(&net->nft.notify_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); } static struct pernet_operations nf_tables_net_ops = { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7bc6537f3ccb..b37bd02448d8 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -147,11 +147,11 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, switch (key) { case NFT_META_SKUID: - *dest = from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: - *dest = from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1eb65a7a27fd..c4b4d3376227 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - while (netlink_policy_dump_loop(&cb->args[1])) { + while (netlink_policy_dump_loop(cb->args[1])) { void *hdr; struct nlattr *nest; @@ -1113,6 +1113,12 @@ nla_put_failure: return skb->len; } +static int ctrl_dumppolicy_done(struct netlink_callback *cb) +{ + netlink_policy_dump_free(cb->args[1]); + return 0; +} + static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, @@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETPOLICY, .dumpit = ctrl_dumppolicy, + .done = ctrl_dumppolicy_done, }, }; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 641ffbdd977a..0176b59ce530 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -84,7 +84,6 @@ int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int policy_idx; int err; - /* also returns 0 if "*_state" is our ERR_PTR() end marker */ if (*_state) return 0; @@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished(struct nl_policy_dump *state) !state->policies[state->policy_idx].policy; } -bool netlink_policy_dump_loop(unsigned long *_state) +bool netlink_policy_dump_loop(unsigned long _state) { - struct nl_policy_dump *state = (void *)*_state; - - if (IS_ERR(state)) - return false; - - if (netlink_policy_dump_finished(state)) { - kfree(state); - /* store end marker instead of freed state */ - *_state = (unsigned long)ERR_PTR(-ENOENT); - return false; - } + struct nl_policy_dump *state = (void *)_state; - return true; + return !netlink_policy_dump_finished(state); } int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state) @@ -309,3 +298,10 @@ nla_put_failure: nla_nest_cancel(skb, policy); return -ENOBUFS; } + +void netlink_policy_dump_free(unsigned long _state) +{ + struct nl_policy_dump *state = (void *)_state; + + kfree(state); +} diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a3f1204f1ed2..12d42ab0193b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -905,15 +905,19 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); - if (err == NF_ACCEPT && - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, + NF_NAT_MANIP_SRC); + } } /* Mark NAT done if successful and update the flow key. */ diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index d8252fdab851..b8559c882431 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -199,17 +199,30 @@ static int announce_servers(struct sockaddr_qrtr *sq) if (!node) return 0; + rcu_read_lock(); /* Announce the list of servers registered in this node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); ret = service_announce_new(sq, srv); if (ret < 0) { pr_err("failed to announce new service\n"); return ret; } + + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -344,11 +357,22 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) if (!node) return 0; + rcu_read_lock(); /* Advertise removal of this client to all servers of remote node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); server_del(node, srv->port); + rcu_read_lock(); } + rcu_read_unlock(); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -359,8 +383,17 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); pkt.client.node = cpu_to_le32(from->sq_node); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; @@ -374,8 +407,11 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) pr_err("failed to send bye cmd\n"); return ret; } + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -434,8 +470,17 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, pkt.client.node = cpu_to_le32(node_id); pkt.client.port = cpu_to_le32(port); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; @@ -449,8 +494,11 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, pr_err("failed to send del client cmd\n"); return ret; } + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -554,20 +602,40 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, filter.service = service; filter.instance = instance; + rcu_read_lock(); radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) { node = radix_tree_deref_slot(node_slot); + if (!node) + continue; + if (radix_tree_deref_retry(node)) { + node_slot = radix_tree_iter_retry(&node_iter); + continue; + } + node_slot = radix_tree_iter_resume(node_slot, &node_iter); radix_tree_for_each_slot(srv_slot, &node->servers, &srv_iter, 0) { struct qrtr_server *srv; srv = radix_tree_deref_slot(srv_slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + srv_slot = radix_tree_iter_retry(&srv_iter); + continue; + } + if (!server_match(srv, &filter)) continue; + srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter); + + rcu_read_unlock(); lookup_notify(from, srv, true); + rcu_read_lock(); } } + rcu_read_unlock(); /* Empty notification, to indicate end of listing */ lookup_notify(from, NULL, true); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 90c558f89d46..957aa9263ba4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -332,8 +332,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; - int rc = -ENODEV; - int confirm_rx; + int rc, confirm_rx; confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type); if (confirm_rx < 0) { @@ -357,15 +356,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = !!confirm_rx; - skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); - - mutex_lock(&node->ep_lock); - if (node->ep) - rc = node->ep->xmit(node->ep, skb); - else - kfree_skb(skb); - mutex_unlock(&node->ep_lock); + rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); + if (!rc) { + mutex_lock(&node->ep_lock); + rc = -ENODEV; + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + } /* Need to ensure that a subsequent message carries the otherwise lost * confirm_rx flag if we dropped this one */ if (rc && confirm_rx) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 884cff7bb169..97aebb5d19db 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -518,7 +518,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ - RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ @@ -714,8 +713,8 @@ struct rxrpc_ack_summary { enum rxrpc_command { RXRPC_CMD_SEND_DATA, /* send data message */ RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ + RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */ }; struct rxrpc_call_params { @@ -755,9 +754,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_reject_call(struct rxrpc_sock *); +int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); /* * call_event.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index ef160566aa9a..8df1964db333 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -39,8 +39,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, unsigned int debug_id) { const void *here = __builtin_return_address(0); - struct rxrpc_call *call; + struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; @@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, } /* Now it gets complicated, because calls get registered with the - * socket here, particularly if a user ID is preassigned by the user. + * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) @@ -107,34 +108,33 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, here, (const void *)user_call_ID); write_lock(&rx->call_lock); - if (user_attach_call) { - struct rxrpc_call *xcall; - struct rb_node *parent, **pp; - - /* Check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - call->user_call_ID = user_call_ID; - call->notify_rx = notify_rx; + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < xcall->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > xcall->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + if (user_attach_call) { rxrpc_get_call(call, rxrpc_call_got_kernel); user_attach_call(call, user_call_ID); - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); @@ -157,11 +157,8 @@ id_in_use: } /* - * Preallocate sufficient service connections, calls and peers to cover the - * entire backlog of a socket. When a new call comes in, if we don't have - * sufficient of each available, the call gets rejected as busy or ignored. - * - * The backlog is replenished when a connection is accepted or rejected. + * Allocate the preallocation buffers for incoming service calls. These must + * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { @@ -174,13 +171,6 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) rx->backlog = b; } - if (rx->discard_new_call) - return 0; - - while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp, - atomic_inc_return(&rxrpc_debug_id)) == 0) - ; - return 0; } @@ -333,6 +323,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; call->security = conn->security; + call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->params.peer); call->cong_cwnd = call->peer->cong_cwnd; return call; @@ -402,8 +393,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); - else - sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { @@ -415,12 +404,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; - } + if (call->state < RXRPC_CALL_COMPLETE) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; write_unlock(&call->state_lock); break; @@ -440,9 +425,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rxrpc_send_ping(call, skb); - if (call->state == RXRPC_CALL_SERVER_ACCEPTING) - rxrpc_notify_socket(call); - /* We have to discard the prealloc queue's ref here and rely on a * combination of the RCU read lock and refs held either by the socket * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel @@ -460,187 +442,18 @@ no_call: } /* - * handle acceptance of a call by userspace - * - assign the user call ID to the call at the front of the queue - * - called with the socket locked. + * Charge up socket with preallocated calls, attaching user call IDs. */ -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) - __releases(&rx->sk.sk_lock.slock) - __acquires(call->user_mutex) +int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { - struct rxrpc_call *call; - struct rb_node *parent, **pp; - int ret; - - _enter(",%lx", user_call_ID); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - release_sock(&rx->sk); - kleave(" = -ENODATA [empty]"); - return ERR_PTR(-ENODATA); - } - - /* check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - write_unlock(&rx->call_lock); - - /* We need to gain the mutex from the interrupt handler without - * upsetting lockdep, so we have to release it there and take it here. - * We are, however, still holding the socket lock, so other accepts - * must wait for us and no one can add the user ID behind our backs. - */ - if (mutex_lock_interruptible(&call->user_mutex) < 0) { - release_sock(&rx->sk); - kleave(" = -ERESTARTSYS"); - return ERR_PTR(-ERESTARTSYS); - } - - write_lock(&rx->call_lock); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); - - /* Find the user ID insertion point. */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - BUG(); - } - - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - break; - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_release; - default: - BUG(); - } - - /* formalise the acceptance */ - call->notify_rx = notify_rx; - call->user_call_ID = user_call_ID; - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) - BUG(); - - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_notify_socket(call); - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %p{%d}", call, call->debug_id); - return call; - -out_release: - _debug("release %p", call); - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - goto out; - -id_in_use: - ret = -EBADSLT; - write_unlock(&rx->call_lock); -out: - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * Handle rejection of a call by userspace - * - reject the call at the front of the queue - */ -int rxrpc_reject_call(struct rxrpc_sock *rx) -{ - struct rxrpc_call *call; - bool abort = false; - int ret; - - _enter(""); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - return -ENODATA; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); + struct rxrpc_backlog *b = rx->backlog; - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); - abort = true; - fallthrough; - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_discard; - default: - BUG(); - } + if (rx->sk.sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; -out_discard: - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - if (abort) { - rxrpc_send_abort_packet(call); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - } - rxrpc_service_prealloc(rx, GFP_KERNEL); - _leave(" = %d", ret); - return ret; + return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, + GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); } /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a40fae013942..ed49769b459d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -23,7 +23,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", - [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -352,9 +351,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->service_id = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_ACCEPTING; - if (sp->hdr.securityIndex > 0) - call->state = RXRPC_CALL_SERVER_SECURING; + call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 447f55ca6886..64ace2960ecc 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -269,7 +269,7 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) if (call) { write_lock_bh(&call->state_lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_ACCEPTING; + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; rxrpc_notify_socket(call); } write_unlock_bh(&call->state_lock); @@ -340,18 +340,18 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; spin_lock(&conn->channel_lock); - spin_lock(&conn->state_lock); + spin_lock_bh(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); } else { - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); } spin_unlock(&conn->channel_lock); diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 94c3df392651..2e8bd3b97301 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -903,7 +903,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _enter(""); - if (optlen <= 0 || optlen > PAGE_SIZE - 1) + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) return -EINVAL; description = memdup_sockptr_nul(optval, optlen); @@ -940,7 +940,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) if (IS_ERR(description)) return PTR_ERR(description); - key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk), NULL); + key = request_key(&key_type_keyring, description, NULL); if (IS_ERR(key)) { kfree(description); _leave(" = %ld", PTR_ERR(key)); @@ -1072,7 +1072,7 @@ static long rxrpc_read(const struct key *key, switch (token->security_index) { case RXRPC_SECURITY_RXKAD: - toksize += 9 * 4; /* viceid, kvno, key*2 + len, begin, + toksize += 8 * 4; /* viceid, kvno, key*2, begin, * end, primary, tktlen */ toksize += RND(token->kad->ticket_len); break; @@ -1107,7 +1107,8 @@ static long rxrpc_read(const struct key *key, break; default: /* we have a ticket we can't encode */ - BUG(); + pr_err("Unsupported key token type (%u)\n", + token->security_index); continue; } @@ -1138,6 +1139,14 @@ static long rxrpc_read(const struct key *key, memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ xdr += (_l + 3) >> 2; \ } while(0) +#define ENCODE_BYTES(l, s) \ + do { \ + u32 _l = (l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ + xdr += (_l + 3) >> 2; \ + } while(0) #define ENCODE64(x) \ do { \ __be64 y = cpu_to_be64(x); \ @@ -1165,7 +1174,7 @@ static long rxrpc_read(const struct key *key, case RXRPC_SECURITY_RXKAD: ENCODE(token->kad->vice_id); ENCODE(token->kad->kvno); - ENCODE_DATA(8, token->kad->session_key); + ENCODE_BYTES(8, token->kad->session_key); ENCODE(token->kad->start); ENCODE(token->kad->expiry); ENCODE(token->kad->primary_flag); @@ -1215,7 +1224,6 @@ static long rxrpc_read(const struct key *key, break; default: - BUG(); break; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c4684dde1f16..2c842851d72e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -179,37 +179,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } /* - * Pass back notification of a new call. The call is added to the - * to-be-accepted list. This means that the next call to be accepted might not - * be the last call seen awaiting acceptance, but unless we leave this on the - * front of the queue and block all other messages until someone gives us a - * user_ID for it, there's not a lot we can do. - */ -static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, int flags) -{ - int tmp = 0, ret; - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - - if (ret == 0 && !(flags & MSG_PEEK)) { - _debug("to be accepted"); - write_lock_bh(&rx->recvmsg_lock); - list_del_init(&call->recvmsg_link); - write_unlock_bh(&rx->recvmsg_lock); - - rxrpc_get_call(call, rxrpc_call_got); - write_lock(&rx->call_lock); - list_add_tail(&call->accept_link, &rx->to_be_accepted); - write_unlock(&rx->call_lock); - } - - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); - return ret; -} - -/* * End the packet reception phase. */ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) @@ -630,9 +599,6 @@ try_again: } switch (READ_ONCE(call->state)) { - case RXRPC_CALL_SERVER_ACCEPTING: - ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); - break; case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -728,7 +694,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, call->debug_id, rxrpc_call_states[call->state], iov_iter_count(iter), want_more); - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); mutex_lock(&call->user_mutex); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0824e103d037..d27140c836cc 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -530,10 +530,10 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) return -EINVAL; break; - case RXRPC_ACCEPT: + case RXRPC_CHARGE_ACCEPT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - p->command = RXRPC_CMD_ACCEPT; + p->command = RXRPC_CMD_CHARGE_ACCEPT; if (len != 0) return -EINVAL; break; @@ -659,16 +659,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (ret < 0) goto error_release_sock; - if (p.command == RXRPC_CMD_ACCEPT) { + if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); - /* The socket is now unlocked. */ - if (IS_ERR(call)) - return PTR_ERR(call); - ret = 0; - goto out_put_unlock; + ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); + goto error_release_sock; } call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); @@ -690,7 +686,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: - case RXRPC_CALL_SERVER_ACCEPTING: rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 063d8aaf2900..798430e1a79f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -235,6 +235,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, index++; if (index < s_i) continue; + if (IS_ERR(p)) + continue; if (jiffy_since && time_after(jiffy_since, @@ -307,6 +309,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p)) + continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) { module_put(ops->owner); @@ -467,17 +471,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, } EXPORT_SYMBOL(tcf_idr_create_from_flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) -{ - struct tcf_idrinfo *idrinfo = tn->idrinfo; - - mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); - mutex_unlock(&idrinfo->lock); -} -EXPORT_SYMBOL(tcf_idr_insert); - /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) @@ -731,13 +724,6 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) return ret; } -static int tcf_action_destroy_1(struct tc_action *a, int bind) -{ - struct tc_action *actions[] = { a, NULL }; - - return tcf_action_destroy(actions, bind); -} - static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); @@ -902,6 +888,26 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; +static void tcf_idr_insert_many(struct tc_action *actions[]) +{ + int i; + + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + struct tc_action *a = actions[i]; + struct tcf_idrinfo *idrinfo; + + if (!a) + continue; + idrinfo = a->idrinfo; + mutex_lock(&idrinfo->lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if + * it is just created, otherwise this is just a nop. + */ + idr_replace(&idrinfo->action_idr, a, a->tcfa_index); + mutex_unlock(&idrinfo->lock); + } +} + struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -1002,13 +1008,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !rcu_access_pointer(a->goto_chain)) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); - return ERR_PTR(-EINVAL); - } - return a; err_mod: @@ -1051,6 +1050,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, actions[i - 1] = act; } + /* We have to commit them all together, because if any error happened in + * between, we could not handle the failure gracefully. + */ + tcf_idr_insert_many(actions); + *attr_size = tcf_action_full_attrs_size(sz); return i - 1; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 54d5652cfe6c..a4c7ba35a343 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (res == ACT_P_CREATED) { - tcf_idr_insert(tn, *act); - } else { + if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f901421b0634..e19885d7fe2c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -139,7 +139,6 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index f5826e457679..4fa4fcb842ba 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -110,9 +110,6 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (params_new) kfree_rcu(params_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2c3619165680..a780afdf570d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1297,8 +1297,6 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, tcf_chain_put_by_act(goto_ch); if (params) call_rcu(¶ms->rcu, tcf_ct_params_free); - if (res == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return res; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b5042f3ea079..6084300e51ad 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -269,9 +269,6 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, if (cp_new) kfree_rcu(cp_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 410e3bbfb9ca..73c3926358a0 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -140,8 +140,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 1fb8d428d2c1..7c0771dd77a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -437,9 +437,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; chain_put: diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c1fcd85719d6..a2ddea04183a 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static int load_metalist(struct nlattr **tb, bool rtnl_held) +{ + int i; + + for (i = 1; i < max_metacnt; i++) { + if (tb[i]) { + void *val = nla_data(tb[i]); + int len = nla_len(tb[i]); + int rc; + + rc = load_metaops_and_vet(i, val, len, rtnl_held); + if (rc != 0) + return rc; + } + } + + return 0; +} + static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(i, val, len, rtnl_held); - if (rc != 0) - return rc; - rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; @@ -509,6 +524,21 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!p) return -ENOMEM; + if (tb[TCA_IFE_METALST]) { + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); + if (err) { + kfree(p); + return err; + } + err = load_metalist(tb2, rtnl_held); + if (err) { + kfree(p); + return err; + } + } + index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { @@ -570,15 +600,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, - tb[TCA_IFE_METALST], NULL, - NULL); - if (err) - goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; - } else { /* if no passed metadata allow list or passed allow-all * then here we process by adding as many supported metadatum @@ -603,9 +627,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; metadata_parse_err: if (goto_ch) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 400a2cfe8452..8dc3bec0d325 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -189,8 +189,6 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_t = t; ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; err3: diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b2705318993b..e24b7e2331cd 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -194,8 +194,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); - - tcf_idr_insert(tn, *a); } return ret; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8118e2640979..e298ec3b3c9e 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -273,8 +273,6 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 855a6fa16a62..1ebd2a86d980 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -93,9 +93,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c158bfed86d5..b45304446e13 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -238,8 +238,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0b431d493768..8d8452b1cdd4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -201,8 +201,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new) kfree_rcu(new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; failure: diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 5e2df590bb58..3ebf9ede3cf1 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -116,8 +116,6 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 9813ca4006dd..a4f3d0f0daa9 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -157,8 +157,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, goto release_idr; } - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index d0652386c6e2..e5f3fb8b00e3 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -225,8 +225,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 39e6d94cfafb..81a1c67335be 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -190,8 +190,6 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 536c4bc31be6..a229751ee8c4 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -156,6 +156,7 @@ tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, struct vxlan_metadata *md = dst; md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; } return sizeof(struct vxlan_metadata); @@ -536,9 +537,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a5ff9f68ab02..163b0385fd4c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -229,8 +229,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a4f7ef1de7e7..fed18fd2c50b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1175,8 +1175,10 @@ static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; + } return sizeof(*md); } @@ -1221,6 +1223,7 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, } if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; + memset(&md->u, 0x00, sizeof(md->u)); md->u.index = nla_get_be32(nla); } } else if (md->version == 2) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 265a61d011df..54c417244642 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1131,24 +1131,10 @@ EXPORT_SYMBOL(dev_activate); static void qdisc_deactivate(struct Qdisc *qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - if (qdisc->flags & TCQ_F_BUILTIN) return; - if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state)) - return; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); } static void dev_deactivate_queue(struct net_device *dev, @@ -1165,6 +1151,30 @@ static void dev_deactivate_queue(struct net_device *dev, } } +static void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = dev_queue->qdisc_sleeping; + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); +} + static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i; @@ -1213,12 +1223,20 @@ void dev_deactivate_many(struct list_head *head) dev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls. + /* Wait for outstanding qdisc-less dev_queue_xmit calls or + * outstanding qdisc enqueuing calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ synchronize_net(); + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + } + /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index fe53c1e38c7d..b0ad7687ee2c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -777,9 +777,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; -static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, +static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, + struct sched_entry *entry, struct netlink_ext_ack *extack) { + int min_duration = length_to_duration(q, ETH_ZLEN); u32 interval = 0; if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) @@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, interval = nla_get_u32( tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); - if (interval == 0) { + /* The interval should allow at least the minimum ethernet + * frame to go out. + */ + if (interval < min_duration) { NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); return -EINVAL; } @@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, return 0; } -static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, - int index, struct netlink_ext_ack *extack) +static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n, + struct sched_entry *entry, int index, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; @@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, entry->index = index; - return fill_sched_entry(tb, entry, extack); + return fill_sched_entry(q, tb, entry, extack); } -static int parse_sched_list(struct nlattr *list, +static int parse_sched_list(struct taprio_sched *q, struct nlattr *list, struct sched_gate_list *sched, struct netlink_ext_ack *extack) { @@ -847,7 +853,7 @@ static int parse_sched_list(struct nlattr *list, return -ENOMEM; } - err = parse_sched_entry(n, entry, i, extack); + err = parse_sched_entry(q, n, entry, i, extack); if (err < 0) { kfree(entry); return err; @@ -862,7 +868,7 @@ static int parse_sched_list(struct nlattr *list, return i; } -static int parse_taprio_schedule(struct nlattr **tb, +static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, struct sched_gate_list *new, struct netlink_ext_ack *extack) { @@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct nlattr **tb, new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) - err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], + new, extack); if (err < 0) return err; @@ -1473,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - err = parse_taprio_schedule(tb, new_admin, extack); + err = parse_taprio_schedule(q, tb, new_admin, extack); if (err < 0) goto free_sched; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 9e289c770574..7e59d8a18f3e 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -494,6 +494,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) out_err: /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); + ep->auth_hmacs = NULL; return -ENOMEM; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 836615f71a7d..53d0a4161df3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9220,13 +9220,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { - int ancestor_size = sizeof(struct inet_sock) + - sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, pd_lobby); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); + size_t ancestor_size = sizeof(struct inet_sock); + ancestor_size += sk_from->sk_prot->obj_size; + ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } diff --git a/net/socket.c b/net/socket.c index 0c0144604f81..58cac2da5f66 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3638,9 +3638,11 @@ EXPORT_SYMBOL(kernel_getpeername); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { - if (sock->ops->sendpage) + if (sock->ops->sendpage) { + /* Warn in case the improper page to zero-copy send */ + WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); return sock->ops->sendpage(sock, page, offset, size, flags); - + } return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(kernel_sendpage); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d5805fa1d066..c2752e2b9ce3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -228,7 +228,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) { struct bvec_iter bi = { - .bi_size = size, + .bi_size = size + seek, }; struct bio_vec bv; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 865f3e037425..23d868545362 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data - * + * @extack: netlink extended ack * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, diff --git a/net/tipc/group.c b/net/tipc/group.c index 588c2d2b0c69..b1fcd2ad5ecf 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; diff --git a/net/tipc/link.c b/net/tipc/link.c index b7362556da95..cef38a910107 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -532,7 +532,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * tipc_link_bc_create - create new link to be used for broadcast * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers - * @window: send window to be used + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 848fae674532..52e93ba4d8e2 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) goto err; head = *headbuf = frag; *buf = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd280e767bd..11b27ddc75ba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2771,10 +2771,7 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - if (tipc_sk_type_connectionless(sk)) - sk->sk_shutdown = SHUTDOWN_MASK; - else - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9a3d9fedd7aa..95ab5545a931 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2143,10 +2143,15 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; + int pending; /* Wait for any pending async encryptions to complete */ - smp_store_mb(ctx->async_notify, true); - if (atomic_read(&ctx->encrypt_pending)) + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + + if (pending) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); tls_tx_records(sk, -1); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index faf74850a1b5..27026f587fa6 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -217,6 +217,7 @@ config LIB80211_CRYPT_WEP config LIB80211_CRYPT_CCMP tristate + select CRYPTO select CRYPTO_AES select CRYPTO_CCM diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2c9e9a2d1688..7fd45f6ddb05 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4172,6 +4172,9 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (key.idx < 0) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); diff --git a/net/wireless/util.c b/net/wireless/util.c index 4a9ff9ef513f..6fa99df52f86 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -95,7 +95,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) /* see 802.11ax D6.1 27.3.23.2 */ if (chan == 2) return MHZ_TO_KHZ(5935); - if (chan <= 253) + if (chan <= 233) return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index e97db37354e4..b010bfde0149 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -303,10 +303,10 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u32 chunk_size = mr->chunk_size, headroom = mr->headroom; u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_per_page; + unsigned int chunks, chunks_rem; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -336,19 +336,18 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if ((addr + size) < addr) return -EINVAL; - npgs = size >> PAGE_SHIFT; + npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); + if (npgs_rem) + npgs++; if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64(size, chunk_size); + chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); if (chunks == 0) return -EINVAL; - if (!unaligned_chunks) { - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; - } + if (!unaligned_chunks && chunks_rem) + return -EINVAL; if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c3231620d210..6c5e09e7440a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -377,15 +377,30 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; + /* Hinder dev_direct_xmit from freeing the packet and + * therefore completing it in the destructor + */ + refcount_inc(&skb->users); err = dev_direct_xmit(skb, xs->queue_id); + if (err == NETDEV_TX_BUSY) { + /* Tell user-space to retry the send */ + skb->destructor = sock_wfree; + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + err = -EAGAIN; + goto out; + } + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ - if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { + if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ + kfree_skb(skb); err = -EBUSY; goto out; } + consume_skb(skb); sent_frame = true; } diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 827ccdf2db57..1f08ebf7d80c 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -29,8 +29,12 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, static void handle_esp(struct sk_buff *skb, struct sock *sk) { + struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb; + skb_reset_transport_header(skb); - memset(skb->cb, 0, sizeof(skb->cb)); + + /* restore IP CB, we need at least IP6CB->nhoff */ + memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header)); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index eb8181987620..a8f66112c52b 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -303,7 +303,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 69520ad3d83b..efc89a92961d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1019,7 +1019,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, */ if (x->km.state == XFRM_STATE_VALID) { if ((x->sel.family && - !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + (x->sel.family != family || + !xfrm_selector_match(&x->sel, fl, family))) || !security_xfrm_state_pol_flow_match(x, pol, fl)) return; @@ -1032,7 +1033,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, *acq_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + if ((!x->sel.family || + (x->sel.family == family && + xfrm_selector_match(&x->sel, fl, family))) && security_xfrm_state_pol_flow_match(x, pol, fl)) *error = -ESRCH; } @@ -1072,7 +1075,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } if (best || acquire_in_progress) @@ -1089,7 +1092,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } @@ -1441,6 +1444,30 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE +static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security) +{ + struct xfrm_user_sec_ctx *uctx; + int size = sizeof(*uctx) + security->ctx_len; + int err; + + uctx = kmalloc(size, GFP_KERNEL); + if (!uctx) + return -ENOMEM; + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = size; + uctx->ctx_doi = security->ctx_doi; + uctx->ctx_alg = security->ctx_alg; + uctx->ctx_len = security->ctx_len; + memcpy(uctx + 1, security->ctx_str, security->ctx_len); + err = security_xfrm_state_alloc(x, uctx); + kfree(uctx); + if (err) + return err; + + return 0; +} + static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, struct xfrm_encap_tmpl *encap) { @@ -1497,6 +1524,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, goto error; } + if (orig->security) + if (clone_security(x, orig->security)) + goto error; + if (orig->coaddr) { x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), GFP_KERNEL); @@ -1510,6 +1541,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, } memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); if (xfrm_init_state(x) < 0) goto error; @@ -1521,7 +1553,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; x->replay_maxage = orig->replay_maxage; - x->curlft.add_time = orig->curlft.add_time; + memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft)); x->km.state = orig->km.state; x->km.seq = orig->km.seq; x->replay = orig->replay; |