diff options
Diffstat (limited to 'net')
184 files changed, 2898 insertions, 1118 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index f6012f8e59f0..fc9eb02a912f 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -407,7 +407,7 @@ static void garp_join_timer_arm(struct garp_applicant *app) { unsigned long delay; - delay = (u64)msecs_to_jiffies(garp_join_time) * prandom_u32() >> 32; + delay = prandom_u32_max(msecs_to_jiffies(garp_join_time)); mod_timer(&app->join_timer, jiffies + delay); } diff --git a/net/802/mrp.c b/net/802/mrp.c index 35e04cc5390c..155f74d8b14f 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -592,7 +592,7 @@ static void mrp_join_timer_arm(struct mrp_applicant *app) { unsigned long delay; - delay = (u64)msecs_to_jiffies(mrp_join_time) * prandom_u32() >> 32; + delay = prandom_u32_max(msecs_to_jiffies(mrp_join_time)); mod_timer(&app->join_timer, jiffies + delay); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e1bb41a443c4..296d0145932f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -712,13 +712,13 @@ static void vlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); rxpackets = u64_stats_read(&p->rx_packets); rxbytes = u64_stats_read(&p->rx_bytes); rxmulticast = u64_stats_read(&p->rx_multicast); txpackets = u64_stats_read(&p->tx_packets); txbytes = u64_stats_read(&p->tx_bytes); - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 829db9eba0cb..aaf64b953915 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff, if (!page) return -ENOMEM; - for (p = page, len = 0; len < nbytes; p++, len++) { + for (p = page, len = 0; len < nbytes; p++) { if (get_user(*p, buff++)) { free_page((unsigned long)page); return -EFAULT; } + len += 1; if (*p == '\0' || *p == '\n') break; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 09140bc8c15e..5e988f0ed2c0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4899,9 +4899,9 @@ void br_multicast_get_stats(const struct net_bridge *br, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6e53dc991409..f2fc284abab3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1378,12 +1378,12 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, cpu_stats = per_cpu_ptr(v->stats, i); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rxpackets = u64_stats_read(&cpu_stats->rx_packets); rxbytes = u64_stats_read(&cpu_stats->rx_bytes); txbytes = u64_stats_read(&cpu_stats->tx_bytes); txpackets = u64_stats_read(&cpu_stats->tx_packets); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rxpackets); u64_stats_add(&stats->rx_bytes, rxbytes); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index d7d86c944d76..f26f4cfa9e63 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -342,10 +342,12 @@ static void j1939_session_skb_drop_old(struct j1939_session *session) __skb_unlink(do_skb, &session->skb_queue); /* drop ref taken in j1939_session_skb_queue() */ skb_unref(do_skb); + spin_unlock_irqrestore(&session->skb_queue.lock, flags); kfree_skb(do_skb); + } else { + spin_unlock_irqrestore(&session->skb_queue.lock, flags); } - spin_unlock_irqrestore(&session->skb_queue.lock, flags); } void j1939_session_skb_queue(struct j1939_session *session, @@ -985,7 +987,7 @@ static int j1939_session_tx_eoma(struct j1939_session *session) /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250); - netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 6a6898ee4049..db60217f911b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -222,7 +222,7 @@ static void pick_new_mon(struct ceph_mon_client *monc) max--; } - n = prandom_u32() % max; + n = prandom_u32_max(max); if (o >= 0 && n >= o) n++; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 87b883c7bfd6..4e4f1e4bc265 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1479,7 +1479,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, static int pick_random_replica(const struct ceph_osds *acting) { - int i = prandom_u32() % acting->size; + int i = prandom_u32_max(acting->size); dout("%s picked osd%d, primary osd%d\n", __func__, acting->osds[i], acting->primary); diff --git a/net/core/dev.c b/net/core/dev.c index fa53830d0683..2e4f1c97b59e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1333,7 +1333,7 @@ void netdev_state_change(struct net_device *dev) call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); } } EXPORT_SYMBOL(netdev_state_change); @@ -1469,7 +1469,7 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) if (ret < 0) return ret; - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL); call_netdevice_notifiers(NETDEV_UP, dev); return ret; @@ -1541,7 +1541,7 @@ void dev_close_many(struct list_head *head, bool unlink) __dev_close_many(head); list_for_each_entry_safe(dev, tmp, head, close_list) { - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL); call_netdevice_notifiers(NETDEV_DOWN, dev); if (unlink) list_del_init(&dev->close_list); @@ -5136,11 +5136,13 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, case TC_ACT_SHOT: mini_qdisc_qstats_cpu_drop(miniq); kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + *ret = NET_RX_DROP; return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: consume_skb(skb); + *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_REDIRECT: /* skb_mac_header check was done by cls/act_bpf, so @@ -5153,8 +5155,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, *another = true; break; } + *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_CONSUMED: + *ret = NET_RX_SUCCESS; return NULL; default: break; @@ -8347,7 +8351,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev_change_rx_flags(dev, IFF_PROMISC); } if (notify) - __dev_notify_flags(dev, old_flags, IFF_PROMISC); + __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL); return 0; } @@ -8402,7 +8406,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) dev_set_rx_mode(dev); if (notify) __dev_notify_flags(dev, old_flags, - dev->gflags ^ old_gflags); + dev->gflags ^ old_gflags, 0, NULL); } return 0; } @@ -8565,12 +8569,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, } void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, - unsigned int gchanges) + unsigned int gchanges, u32 portid, + const struct nlmsghdr *nlh) { unsigned int changes = dev->flags ^ old_flags; if (gchanges) - rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC); + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC, portid, nlh); if (changes & IFF_UP) { if (dev->flags & IFF_UP) @@ -8612,7 +8617,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, return ret; changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags); - __dev_notify_flags(dev, old_flags, changes); + __dev_notify_flags(dev, old_flags, changes, 0, NULL); return ret; } EXPORT_SYMBOL(dev_change_flags); @@ -8818,7 +8823,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; @@ -10097,7 +10102,7 @@ int register_netdevice(struct net_device *dev) */ if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL); out: return ret; @@ -10473,12 +10478,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, stats = per_cpu_ptr(netstats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); rx_packets = u64_stats_read(&stats->rx_packets); rx_bytes = u64_stats_read(&stats->rx_bytes); tx_packets = u64_stats_read(&stats->tx_packets); tx_bytes = u64_stats_read(&stats->tx_bytes); - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); s->rx_packets += rx_packets; s->rx_bytes += rx_bytes; @@ -10776,14 +10781,8 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); -/** - * unregister_netdevice_many - unregister many devices - * @head: list of devices - * - * Note: As most callers use a stack allocated list_head, - * we force a list_del() to make sure stack wont be corrupted later. - */ -void unregister_netdevice_many(struct list_head *head) +void unregister_netdevice_many_notify(struct list_head *head, + u32 portid, const struct nlmsghdr *nlh) { struct net_device *dev, *tmp; LIST_HEAD(close_head); @@ -10845,7 +10844,8 @@ void unregister_netdevice_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL, NULL, 0); + GFP_KERNEL, NULL, 0, + portid, nlmsg_seq(nlh)); /* * Flush the unicast and multicast chains @@ -10860,7 +10860,7 @@ void unregister_netdevice_many(struct list_head *head) dev->netdev_ops->ndo_uninit(dev); if (skb) - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); @@ -10883,6 +10883,18 @@ void unregister_netdevice_many(struct list_head *head) list_del(head); } + +/** + * unregister_netdevice_many - unregister many devices + * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. + */ +void unregister_netdevice_many(struct list_head *head) +{ + unregister_netdevice_many_notify(head, 0, NULL); +} EXPORT_SYMBOL(unregister_netdevice_many); /** @@ -11038,7 +11050,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL); synchronize_net(); err = 0; diff --git a/net/core/dev.h b/net/core/dev.h index cbb8a925175a..814ed5b7b960 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -88,6 +88,13 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, + unsigned int gchanges, u32 portid, + const struct nlmsghdr *nlh); + +void unregister_netdevice_many_notify(struct list_head *head, + u32 portid, const struct nlmsghdr *nlh); + static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3076..5cdbfbf9a7dc 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/devlink.c b/net/core/devlink.c index 89baa7c0938b..0a16ad45520e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8304,10 +8304,10 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, cpu_stats = per_cpu_ptr(trap_stats, i); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = u64_stats_read(&cpu_stats->rx_packets); rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rx_packets); u64_stats_add(&stats->rx_bytes, rx_bytes); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index f084a4a6b7ab..11aa6e8a3098 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1432,9 +1432,9 @@ static void net_dm_stats_read(struct net_dm_stats *stats) u64 dropped; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } @@ -1476,9 +1476,9 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) u64 dropped; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index c8d137ef5980..b71ccaec0991 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -135,10 +135,10 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, u64 bytes, packets; do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); + start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; @@ -162,10 +162,10 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, } do { if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); + start = u64_stats_fetch_begin(&b->syncp); bytes = u64_stats_read(&b->bytes); packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + } while (running && u64_stats_fetch_retry(&b->syncp, start)); _bstats_update(bstats, bytes, packets); } @@ -187,10 +187,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, u64 bytes, packets; do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); + start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; @@ -201,10 +201,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, } do { if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); + start = u64_stats_fetch_begin(&b->syncp); *ret_bytes = u64_stats_read(&b->bytes); *ret_packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + } while (running && u64_stats_fetch_retry(&b->syncp, start)); } static int diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e93edb810103..3c4786b99907 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -111,7 +111,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) unsigned long neigh_rand_reach_time(unsigned long base) { - return base ? (prandom_u32() % base) + (base >> 1) : 0; + return base ? prandom_u32_max(base) + (base >> 1) : 0; } EXPORT_SYMBOL(neigh_rand_reach_time); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0ec2f5906a27..5581d22cc191 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -117,6 +117,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) static int ops_init(const struct pernet_operations *ops, struct net *net) { + struct net_generic *ng; int err = -ENOMEM; void *data = NULL; @@ -135,7 +136,13 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) if (!err) return 0; + if (ops->id && ops->size) { cleanup: + ng = rcu_dereference_protected(net->gen, + lockdep_is_held(&pernet_ops_rwsem)); + ng->ptr[*ops->id] = NULL; + } + kfree(data); out: @@ -309,6 +316,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -429,6 +437,10 @@ static void net_free(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); + + /* There should not be any trackers left there. */ + ref_tracker_dir_exit(&net->notrefcnt_tracker); + kmem_cache_free(net_cachep, net); } } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 88906ba6d9a7..c3763056c554 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2324,7 +2324,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) pkt_dev->curfl = 0; /*reset */ } } else { - flow = prandom_u32() % pkt_dev->cflows; + flow = prandom_u32_max(pkt_dev->cflows); pkt_dev->curfl = flow; if (pkt_dev->flows[flow].count > pkt_dev->lflow) { @@ -2380,10 +2380,9 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { __u16 t; if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = prandom_u32() % - (pkt_dev->queue_map_max - - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; + t = prandom_u32_max(pkt_dev->queue_map_max - + pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; } else { t = pkt_dev->cur_queue_map + 1; if (t > pkt_dev->queue_map_max) @@ -2412,7 +2411,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACSRC_RND) - mc = prandom_u32() % pkt_dev->src_mac_count; + mc = prandom_u32_max(pkt_dev->src_mac_count); else { mc = pkt_dev->cur_src_mac_offset++; if (pkt_dev->cur_src_mac_offset >= @@ -2438,7 +2437,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACDST_RND) - mc = prandom_u32() % pkt_dev->dst_mac_count; + mc = prandom_u32_max(pkt_dev->dst_mac_count); else { mc = pkt_dev->cur_dst_mac_offset++; @@ -2465,23 +2464,23 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - ((__force __be32)prandom_u32() & + ((__force __be32)get_random_u32() & htonl(0x000fffff)); } if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { - pkt_dev->vlan_id = prandom_u32() & (4096 - 1); + pkt_dev->vlan_id = prandom_u32_max(4096); } if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { - pkt_dev->svlan_id = prandom_u32() & (4096 - 1); + pkt_dev->svlan_id = prandom_u32_max(4096); } if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { if (pkt_dev->flags & F_UDPSRC_RND) - pkt_dev->cur_udp_src = prandom_u32() % - (pkt_dev->udp_src_max - pkt_dev->udp_src_min) - + pkt_dev->udp_src_min; + pkt_dev->cur_udp_src = prandom_u32_max( + pkt_dev->udp_src_max - pkt_dev->udp_src_min) + + pkt_dev->udp_src_min; else { pkt_dev->cur_udp_src++; @@ -2492,9 +2491,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { if (pkt_dev->flags & F_UDPDST_RND) { - pkt_dev->cur_udp_dst = prandom_u32() % - (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) - + pkt_dev->udp_dst_min; + pkt_dev->cur_udp_dst = prandom_u32_max( + pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) + + pkt_dev->udp_dst_min; } else { pkt_dev->cur_udp_dst++; if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) @@ -2509,7 +2508,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (imn < imx) { __u32 t; if (pkt_dev->flags & F_IPSRC_RND) - t = prandom_u32() % (imx - imn) + imn; + t = prandom_u32_max(imx - imn) + imn; else { t = ntohl(pkt_dev->cur_saddr); t++; @@ -2531,8 +2530,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->flags & F_IPDST_RND) { do { - t = prandom_u32() % - (imx - imn) + imn; + t = prandom_u32_max(imx - imn) + + imn; s = htonl(t); } while (ipv4_is_loopback(s) || ipv4_is_multicast(s) || @@ -2569,7 +2568,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - (((__force __be32)prandom_u32() | + (((__force __be32)get_random_u32() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2579,9 +2578,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { __u32 t; if (pkt_dev->flags & F_TXSIZE_RND) { - t = prandom_u32() % - (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size) - + pkt_dev->min_pkt_size; + t = prandom_u32_max(pkt_dev->max_pkt_size - + pkt_dev->min_pkt_size) + + pkt_dev->min_pkt_size; } else { t = pkt_dev->cur_pkt_size + 1; if (t > pkt_dev->max_pkt_size) @@ -2590,7 +2589,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } else if (pkt_dev->n_imix_entries > 0) { struct imix_pkt *entry; - __u32 t = prandom_u32() % IMIX_PRECISION; + __u32 t = prandom_u32_max(IMIX_PRECISION); __u8 entry_index = pkt_dev->imix_distribution[t]; entry = &pkt_dev->imix_entries[entry_index]; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 74864dc46a7e..d2f27548fc0b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -760,7 +760,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) EXPORT_SYMBOL(rtnl_unicast); void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) + const struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; @@ -3110,7 +3110,7 @@ static int rtnl_group_dellink(const struct net *net, int group) return 0; } -int rtnl_delete_link(struct net_device *dev) +int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh) { const struct rtnl_link_ops *ops; LIST_HEAD(list_kill); @@ -3120,7 +3120,7 @@ int rtnl_delete_link(struct net_device *dev) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); + unregister_netdevice_many_notify(&list_kill, portid, nlh); return 0; } @@ -3130,6 +3130,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + u32 portid = NETLINK_CB(skb).portid; struct net *tgt_net = net; struct net_device *dev = NULL; struct ifinfomsg *ifm; @@ -3171,7 +3172,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = rtnl_delete_link(dev); + err = rtnl_delete_link(dev, portid, nlh); out: if (netnsid >= 0) @@ -3180,7 +3181,8 @@ out: return err; } -int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) +int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, + u32 portid, const struct nlmsghdr *nlh) { unsigned int old_flags; int err; @@ -3194,10 +3196,10 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { - __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags)); + __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags), portid, nlh); } else { dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - __dev_notify_flags(dev, old_flags, ~0U); + __dev_notify_flags(dev, old_flags, ~0U, portid, nlh); } return 0; } @@ -3311,11 +3313,13 @@ static int rtnl_group_changelink(const struct sk_buff *skb, static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, + const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) { unsigned char name_assign_type = NET_NAME_USER; struct net *net = sock_net(skb->sk); + u32 portid = NETLINK_CB(skb).portid; struct net *dest_net, *link_net; struct net_device *dev; char ifname[IFNAMSIZ]; @@ -3369,7 +3373,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } - err = rtnl_configure_link(dev, ifm); + err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; if (link_net) { @@ -3578,7 +3582,7 @@ replay: return -EOPNOTSUPP; } - return rtnl_newlink_create(skb, ifm, ops, tb, data, extack); + return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3896,7 +3900,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex) + int new_ifindex, u32 portid, u32 seq) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -3907,7 +3911,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, goto errout; err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), - type, 0, 0, change, 0, 0, event, + type, portid, seq, change, 0, 0, event, new_nsid, new_ifindex, -1, flags); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ @@ -3922,16 +3926,18 @@ errout: return NULL; } -void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) +void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, + u32 portid, const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); + rtnl_notify(skb, net, portid, RTNLGRP_LINK, nlh, flags); } static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags, int *new_nsid, int new_ifindex) + gfp_t flags, int *new_nsid, int new_ifindex, + u32 portid, const struct nlmsghdr *nlh) { struct sk_buff *skb; @@ -3939,23 +3945,23 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, return; skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, - new_ifindex); + new_ifindex, portid, nlmsg_seq(nlh)); if (skb) - rtmsg_ifinfo_send(skb, dev, flags); + rtmsg_ifinfo_send(skb, dev, flags, portid, nlh); } void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) + gfp_t flags, u32 portid, const struct nlmsghdr *nlh) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, - NULL, 0); + NULL, 0, portid, nlh); } void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, - new_nsid, new_ifindex); + new_nsid, new_ifindex, 0, NULL); } static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -6140,7 +6146,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL, NULL, 0); + GFP_KERNEL, NULL, 0, 0, NULL); break; default: break; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1d9719e72f9d..42a35b59fb1e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -94,6 +94,7 @@ EXPORT_SYMBOL(sysctl_max_skb_frags); #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, const char * const drop_reasons[] = { + [SKB_CONSUMED] = "CONSUMED", DEFINE_DROP_REASON(FN, FN) }; EXPORT_SYMBOL(drop_reasons); @@ -506,14 +507,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); + osize = kmalloc_size_roundup(size); + data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; - /* kmalloc(size) might give us more room than requested. + /* kmalloc_size_roundup() might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ - osize = ksize(data); size = SKB_WITH_OVERHEAD(osize); prefetchw(data + size); @@ -748,6 +749,13 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static bool skb_pp_recycle(struct sk_buff *skb, void *data) +{ + if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) + return false; + return page_pool_return_skb_page(virt_to_page(data)); +} + static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; @@ -761,7 +769,7 @@ static void skb_free_head(struct sk_buff *skb) } } -static void skb_release_data(struct sk_buff *skb) +static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -784,7 +792,7 @@ static void skb_release_data(struct sk_buff *skb) free_head: if (shinfo->frag_list) - kfree_skb_list(shinfo->frag_list); + kfree_skb_list_reason(shinfo->frag_list, reason); skb_free_head(skb); exit: @@ -847,11 +855,11 @@ void skb_release_head_state(struct sk_buff *skb) } /* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_head_state(skb); if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb, reason); } /** @@ -865,7 +873,7 @@ static void skb_release_all(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - skb_release_all(skb); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); @@ -887,7 +895,10 @@ kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); - trace_kfree_skb(skb, __builtin_return_address(0), reason); + if (reason == SKB_CONSUMED) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb_reason); @@ -1045,7 +1056,7 @@ EXPORT_SYMBOL(consume_skb); void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } @@ -1070,7 +1081,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) void __kfree_skb_defer(struct sk_buff *skb) { - skb_release_all(skb); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); napi_skb_cache_put(skb); } @@ -1108,7 +1119,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - skb_release_all(skb); + skb_release_all(skb, SKB_CONSUMED); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); @@ -1239,7 +1250,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg); */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { - skb_release_all(dst); + skb_release_all(dst, SKB_CONSUMED); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); @@ -1814,10 +1825,11 @@ EXPORT_SYMBOL(__pskb_copy_fclone); int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { - int i, osize = skb_end_offset(skb); - int size = osize + nhead + ntail; + unsigned int osize = skb_end_offset(skb); + unsigned int size = osize + nhead + ntail; long off; u8 *data; + int i; BUG_ON(nhead < 0); @@ -1825,15 +1837,16 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_zcopy_downgrade_managed(skb); - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. @@ -1860,7 +1873,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); } else { skb_free_head(skb); } @@ -3971,7 +3984,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, } else if (i < MAX_SKB_FRAGS) { skb_zcopy_downgrade_managed(skb); get_page(page); - skb_fill_page_desc(skb, i, page, offset, size); + skb_fill_page_desc_noacc(skb, i, page, offset, size); } else { return -EMSGSIZE; } @@ -6167,21 +6180,20 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, const int headlen, gfp_t gfp_mask) { int i; - int size = skb_end_offset(skb); + unsigned int size = skb_end_offset(skb); int new_hlen = headlen - off; u8 *data; - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; - - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); /* Copy real data, and all frags */ skb_copy_from_linear_data_offset(skb, off, data, new_hlen); @@ -6201,7 +6213,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); } else { /* we can reuse existing recount- all we did was * relocate values @@ -6286,22 +6298,21 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, int pos, gfp_t gfp_mask) { int i, k = 0; - int size = skb_end_offset(skb); + unsigned int size = skb_end_offset(skb); u8 *data; const int nfrags = skb_shinfo(skb)->nr_frags; struct skb_shared_info *shinfo; - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; - - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); @@ -6345,7 +6356,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, kfree(data); return -ENOMEM; } - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); skb->head = data; skb->head_frag = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ca70525621c7..1efdc47a999b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -500,11 +500,11 @@ bool sk_msg_is_readable(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_msg_is_readable); -static struct sk_msg *alloc_sk_msg(void) +static struct sk_msg *alloc_sk_msg(gfp_t gfp) { struct sk_msg *msg; - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); + msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN); if (unlikely(!msg)) return NULL; sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); @@ -520,7 +520,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; - return alloc_sk_msg(); + return alloc_sk_msg(GFP_KERNEL); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, @@ -597,7 +597,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { - struct sk_msg *msg = alloc_sk_msg(); + struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); struct sock *sk = psock->sk; int err; diff --git a/net/core/sock.c b/net/core/sock.c index a3ba0358c77c..4571914a4aa8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1436,7 +1436,7 @@ set_sndbuf: break; } case SO_INCOMING_CPU: - WRITE_ONCE(sk->sk_incoming_cpu, val); + reuseport_update_incoming_cpu(sk, val); break; case SO_CNX_ADVICE: @@ -2094,6 +2094,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) { get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); + } else { + __netns_tracker_alloc(net, &sk->ns_tracker, + false, priority); } sock_net_set(sk, net); @@ -2149,6 +2152,9 @@ static void __sk_destruct(struct rcu_head *head) if (likely(sk->sk_net_refcnt)) put_net_track(sock_net(sk), &sk->ns_tracker); + else + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + sk_prot_free(sk->sk_prot_creator, sk); } @@ -2237,6 +2243,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (likely(newsk->sk_net_refcnt)) { get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); sock_inuse_add(sock_net(newsk), 1); + } else { + /* Kernel sockets are not elevating the struct net refcount. + * Instead, use a tracker to more easily detect if a layer + * is not properly dismantling its kernel sockets at netns + * destroy time. + */ + __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, + false, priority); } sk_node_init(&newsk->sk_node); sock_lock_init(newsk); @@ -2730,7 +2744,7 @@ failure: } EXPORT_SYMBOL(sock_alloc_send_pskb); -int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, +int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { u32 tsflags; @@ -2784,7 +2798,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; - ret = __sock_cmsg_send(sk, msg, cmsg, sockc); + ret = __sock_cmsg_send(sk, cmsg, sockc); if (ret) return ret; } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 5daa1fa54249..5a165286e4d8 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -21,6 +21,86 @@ static DEFINE_IDA(reuseport_ida); static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse, struct sock_reuseport *reuse, bool bind_inany); +void reuseport_has_conns_set(struct sock *sk) +{ + struct sock_reuseport *reuse; + + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (likely(reuse)) + reuse->has_conns = 1; + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_has_conns_set); + +static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse) +{ + /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ + WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1); +} + +static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse) +{ + /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ + WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1); +} + +static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) +{ + if (sk->sk_incoming_cpu >= 0) + __reuseport_get_incoming_cpu(reuse); +} + +static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) +{ + if (sk->sk_incoming_cpu >= 0) + __reuseport_put_incoming_cpu(reuse); +} + +void reuseport_update_incoming_cpu(struct sock *sk, int val) +{ + struct sock_reuseport *reuse; + int old_sk_incoming_cpu; + + if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) { + /* Paired with REAE_ONCE() in sk_incoming_cpu_update() + * and compute_score(). + */ + WRITE_ONCE(sk->sk_incoming_cpu, val); + return; + } + + spin_lock_bh(&reuseport_lock); + + /* This must be done under reuseport_lock to avoid a race with + * reuseport_grow(), which accesses sk->sk_incoming_cpu without + * lock_sock() when detaching a shutdown()ed sk. + * + * Paired with READ_ONCE() in reuseport_select_sock_by_hash(). + */ + old_sk_incoming_cpu = sk->sk_incoming_cpu; + WRITE_ONCE(sk->sk_incoming_cpu, val); + + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + + /* reuseport_grow() has detached a closed sk. */ + if (!reuse) + goto out; + + if (old_sk_incoming_cpu < 0 && val >= 0) + __reuseport_get_incoming_cpu(reuse); + else if (old_sk_incoming_cpu >= 0 && val < 0) + __reuseport_put_incoming_cpu(reuse); + +out: + spin_unlock_bh(&reuseport_lock); +} + static int reuseport_sock_index(struct sock *sk, const struct sock_reuseport *reuse, bool closed) @@ -48,6 +128,7 @@ static void __reuseport_add_sock(struct sock *sk, /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */ smp_wmb(); reuse->num_socks++; + reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_sock(struct sock *sk, @@ -60,6 +141,7 @@ static bool __reuseport_detach_sock(struct sock *sk, reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; + reuseport_put_incoming_cpu(sk, reuse); return true; } @@ -70,6 +152,7 @@ static void __reuseport_add_closed_sock(struct sock *sk, reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1); + reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_closed_sock(struct sock *sk, @@ -83,6 +166,7 @@ static bool __reuseport_detach_closed_sock(struct sock *sk, reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks]; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1); + reuseport_put_incoming_cpu(sk, reuse); return true; } @@ -150,6 +234,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) reuse->bind_inany = bind_inany; reuse->socks[0] = sk; reuse->num_socks = 1; + reuseport_get_incoming_cpu(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); out: @@ -193,6 +278,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; more_reuse->has_conns = reuse->has_conns; + more_reuse->incoming_cpu = reuse->incoming_cpu; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -442,18 +528,32 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks, static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse, u32 hash, u16 num_socks) { + struct sock *first_valid_sk = NULL; int i, j; i = j = reciprocal_scale(hash, num_socks); - while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { + do { + struct sock *sk = reuse->socks[i]; + + if (sk->sk_state != TCP_ESTABLISHED) { + /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */ + if (!READ_ONCE(reuse->incoming_cpu)) + return sk; + + /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */ + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + return sk; + + if (!first_valid_sk) + first_valid_sk = sk; + } + i++; if (i >= num_socks) i = 0; - if (i == j) - return NULL; - } + } while (i != j); - return reuse->socks[i]; + return first_valid_sk; } /** diff --git a/net/core/stream.c b/net/core/stream.c index 1105057ce00a..75fded8495f5 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -123,7 +123,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) - current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; + current_timeo = vm_wait = prandom_u32_max(HZ / 5) + 2; add_wait_queue(sk_sleep(sk), &wait); diff --git a/net/core/utils.c b/net/core/utils.c index 938495bc1d34..c994e95172ac 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -302,7 +302,7 @@ static int inet4_pton(const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; - int srclen = strlen(src); + size_t srclen = strlen(src); if (srclen > INET_ADDRSTRLEN) return -EINVAL; @@ -322,7 +322,7 @@ static int inet6_pton(struct net *net, const char *src, u16 port_num, { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; const char *scope_delim; - int srclen = strlen(src); + size_t srclen = strlen(src); if (srclen > INET6_ADDRSTRLEN) return -EINVAL; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 7dfc00c9fb32..9ddc3a9e89e4 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -278,6 +278,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len); +void dccp_destruct_common(struct sock *sk); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); void dccp_destroy_sock(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6a6e121dc00c..713b7b8dad7e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -144,7 +144,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = prandom_u32(); + inet->inet_id = get_random_u16(); err = dccp_connect(sk); rt = NULL; @@ -443,7 +443,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = prandom_u32(); + newinet->inet_id = get_random_u16(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e57b43006074..ae62b1591dea 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1021,6 +1021,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .sockaddr_len = sizeof(struct sockaddr_in6), }; +static void dccp_v6_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); + inet6_sock_destruct(sk); +} + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1033,17 +1039,12 @@ static int dccp_v6_init_sock(struct sock *sk) if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_destruct = dccp_v6_sk_destruct; } return err; } -static void dccp_v6_destroy_sock(struct sock *sk) -{ - dccp_destroy_sock(sk); - inet6_destroy_sock(sk); -} - static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; @@ -1066,7 +1067,7 @@ static struct proto dccp_v6_prot = { .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v6_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c548ca3e9b0e..9494b0d224f9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -171,12 +171,18 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); -static void dccp_sk_destruct(struct sock *sk) +void dccp_destruct_common(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; +} +EXPORT_SYMBOL_GPL(dccp_destruct_common); + +static void dccp_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); inet_sock_destruct(sk); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1a59918d3b30..83e419afa89e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -976,12 +976,12 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, s = per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_irq(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); tx_packets = u64_stats_read(&s->tx_packets); tx_bytes = u64_stats_read(&s->tx_bytes); rx_packets = u64_stats_read(&s->rx_packets); rx_bytes = u64_stats_read(&s->rx_bytes); - } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; data[2] += rx_packets; @@ -3145,7 +3145,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, case NETDEV_CHANGELOWERSTATE: { struct netdev_notifier_changelowerstate_info *info = ptr; struct dsa_port *dp; - int err; + int err = 0; if (dsa_slave_dev_check(dev)) { dp = dsa_slave_to_port(dev); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 566adf85e658..ee3e02da0013 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -202,6 +202,12 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), __DEFINE_LINK_MODE_NAME(10, T1L, Full), + __DEFINE_LINK_MODE_NAME(800000, CR8, Full), + __DEFINE_LINK_MODE_NAME(800000, KR8, Full), + __DEFINE_LINK_MODE_NAME(800000, DR8, Full), + __DEFINE_LINK_MODE_NAME(800000, DR8_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR8, Full), + __DEFINE_LINK_MODE_NAME(800000, VR8, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -238,6 +244,8 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 #define __LINK_MODE_LANES_T1L 1 +#define __LINK_MODE_LANES_VR8 8 +#define __LINK_MODE_LANES_DR8_2 8 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -352,6 +360,12 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR8, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 1c94bb8ea03f..49c0a2a77f02 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -124,7 +124,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base, if (ret) goto err_free; - ret = get_module_eeprom_by_page(dev, &page_data, info->extack); + ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL); if (ret < 0) goto err_ops; diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 5a471e115b66..e8683e485dc9 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - ret = pse_get_pse_attributes(dev, info->extack, data); + ret = pse_get_pse_attributes(dev, info ? info->extack : NULL, data); ethnl_ops_complete(dev); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5bf357734b11..a50429a62f74 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -150,15 +150,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { if (!frame->skb_std) { - if (frame->skb_hsr) { + if (frame->skb_hsr) frame->skb_std = create_stripped_skb_hsr(frame->skb_hsr, frame); - } else { - /* Unexpected */ - WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", - __FILE__, __LINE__, port->dev->name); + else + netdev_warn_once(port->dev, + "Unexpected frame received in hsr_get_untagged_frame()\n"); + + if (!frame->skb_std) return NULL; - } } return skb_clone(frame->skb_std, GFP_ATOMIC); diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index de259b5170ab..57546e07e06a 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -129,6 +129,9 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) wpan_phy_net_set(&rdev->wpan_phy, &init_net); init_waitqueue_head(&rdev->dev_wait); + init_waitqueue_head(&rdev->wpan_phy.sync_txq); + + spin_lock_init(&rdev->wpan_phy.queue_lock); return &rdev->wpan_phy; } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 6e55fae4c686..1fa2fe041ec0 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -502,8 +502,10 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) if (err < 0) goto out; - if (addr->family != AF_IEEE802154) + if (addr->family != AF_IEEE802154) { + err = -EINVAL; goto out; + } ieee802154_addr_from_sa(&haddr, &addr->addr); dev = ieee802154_get_dev(sock_net(sk), &haddr); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bbdd9c44f14e..af7d2cf490fb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_rate.o tcp_recovery.o tcp_ulp.o \ - tcp_offload.o datagram.o raw.o udp.o udplite.o \ + tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3dd02396517d..585f13b6fef6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1706,9 +1706,9 @@ u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt, bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); v = *(((u64 *)bhptr) + offt); - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); return v; } diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 405a8c2aea64..4d1af0cd7d99 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -70,10 +70,10 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = prandom_u32(); + inet->inet_id = get_random_u16(); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 943edf4ad4db..f361d3d56be2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -389,7 +389,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = dev_match || (res.type == RTN_LOCAL && dev == net->loopback_dev); if (dev_match) { - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret; } if (no_addr) @@ -401,7 +401,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; } return ret; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e9a7f70a54df..f721c308248b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1231,7 +1231,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); - nh->fib_nh_scope = RT_SCOPE_LINK; + nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index df0660d818ac..81be3e0f0e70 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -213,7 +213,7 @@ static void igmp_stop_timer(struct ip_mc_list *im) /* It must be called with locked im->lock */ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) { - int tv = prandom_u32() % max_delay; + int tv = prandom_u32_max(max_delay); im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) @@ -222,7 +222,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { - int tv = prandom_u32() % in_dev->mr_maxdelay; + int tv = prandom_u32_max(in_dev->mr_maxdelay); unsigned long exp = jiffies + tv + 2; if (in_dev->mr_gq_running && @@ -236,7 +236,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev) static void igmp_ifc_start_timer(struct in_device *in_dev, int delay) { - int tv = prandom_u32() % delay; + int tv = prandom_u32_max(delay); if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2)) in_dev_hold(in_dev); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ebca860e113f..4e84ed21d16f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -314,7 +314,7 @@ other_half_scan: if (likely(remaining > 1)) remaining &= ~1U; - offset = prandom_u32() % remaining; + offset = prandom_u32_max(remaining); /* __inet_hash_connect() favors ports having @low parity * We do the opposite to not pollute connect() users. */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9f9ac5013a7..7072fc0783ef 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -133,6 +133,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) count = del_timer_sync(&fq->timer) ? 1 : 0; spin_lock_bh(&fq->lock); + fq->flags |= INET_FRAG_DROP; if (!(fq->flags & INET_FRAG_COMPLETE)) { fq->flags |= INET_FRAG_COMPLETE; count++; @@ -260,7 +261,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } -unsigned int inet_frag_rbtree_purge(struct rb_root *root) +unsigned int inet_frag_rbtree_purge(struct rb_root *root, + enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; @@ -274,7 +276,7 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root) struct sk_buff *next = FRAG_CB(skb)->next_frag; sum += skb->truesize; - kfree_skb(skb); + kfree_skb_reason(skb, reason); skb = next; } } @@ -284,17 +286,21 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { - struct fqdir *fqdir; unsigned int sum, sum_truesize = 0; + enum skb_drop_reason reason; struct inet_frags *f; + struct fqdir *fqdir; WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); + reason = (q->flags & INET_FRAG_DROP) ? + SKB_DROP_REASON_FRAG_REASM_TIMEOUT : + SKB_CONSUMED; WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fqdir = q->fqdir; f = fqdir->f; - sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a0ad34e4f044..d3dc28156622 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1037,7 +1037,7 @@ ok: * on low contention the randomness is maximal and on high contention * it may be inexistent. */ - i = max_t(int, i, (prandom_u32() & 7) * 2); + i = max_t(int, i, prandom_u32_max(8) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb153569889e..69c00ffdcf3e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -153,6 +153,7 @@ static void ip_expire(struct timer_list *t) if (qp->q.flags & INET_FRAG_COMPLETE) goto out; + qp->q.flags |= INET_FRAG_DROP; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -194,7 +195,7 @@ out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); - kfree_skb(head); + kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); ipq_put(qp); } @@ -254,7 +255,8 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, + SKB_DROP_REASON_FRAG_TOO_FAR); sub_frag_mem_limit(qp->q.fqdir, sum_truesize); qp->q.flags = 0; @@ -278,10 +280,14 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) struct net_device *dev; unsigned int fragsize; int err = -ENOENT; + SKB_DR(reason); u8 ecn; - if (qp->q.flags & INET_FRAG_COMPLETE) + /* If reassembly is already done, @skb must be a duplicate frag. */ + if (qp->q.flags & INET_FRAG_COMPLETE) { + SKB_DR_SET(reason, DUP_FRAG); goto err; + } if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && @@ -382,8 +388,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) insert_error: if (err == IPFRAG_DUP) { - kfree_skb(skb); - return -EINVAL; + SKB_DR_SET(reason, DUP_FRAG); + err = -EINVAL; + goto err; } err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); @@ -391,7 +398,7 @@ discard_qp: inet_frag_kill(&qp->q); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return err; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f866d6282b2b..d8ee5238c395 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1665,7 +1665,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; - err = rtnl_configure_link(dev, NULL); + err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto out; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1ae83ad629b2..922c87ef1ab5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -172,7 +172,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, * Avoid using the hashed IP ident generator. */ if (sk->sk_protocol == IPPROTO_TCP) - iph->id = (__force __be16)prandom_u32(); + iph->id = (__force __be16)get_random_u16(); else __ip_select_ident(net, iph, 1); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6e19cad154f5..5f16807d3235 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -267,7 +267,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, } #endif if (cmsg->cmsg_level == SOL_SOCKET) { - err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); + err = __sock_cmsg_send(sk, cmsg, &ipc->sockc); if (err) return err; continue; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index ff85db52b2e5..ded5bef02f77 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); + flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index e886147eed11..fc65d69f23e1 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), }; const struct net_device *oif; const struct net_device *found; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 853a75a8fbaf..d8ef05347fd9 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2534,7 +2534,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, if (!err) { nh->nh_flags = fib_nh->fib_nh_flags; fib_info_update_nhc_saddr(net, &fib_nh->nh_common, - fib_nh->fib_nh_scope); + !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1); } else { fib_nh_release(net, fib_nh); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 5386f460bd20..f88daace9de3 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -297,6 +297,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), + SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 795cbe1de912..cd1fa9f70f1a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3664,7 +3664,7 @@ static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->ipv4.rt_genid, 0); atomic_set(&net->fnhe_genid, 0); - atomic_set(&net->ipv4.dev_addr_genid, get_random_int()); + atomic_set(&net->ipv4.dev_addr_genid, get_random_u32()); return 0; } @@ -3719,7 +3719,7 @@ int __init ip_rt_init(void) ip_idents = idents_hash; - prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents)); + get_random_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents)); ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9b8a6db7a66b..0af28cedd071 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -40,6 +40,8 @@ static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; +static int tcp_plb_max_rounds = 31; +static int tcp_plb_max_cong_thresh = 256; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1384,6 +1386,47 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "tcp_plb_enabled", + .data = &init_net.ipv4.sysctl_tcp_plb_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "tcp_plb_idle_rehash_rounds", + .data = &init_net.ipv4.sysctl_tcp_plb_idle_rehash_rounds, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra2 = &tcp_plb_max_rounds, + }, + { + .procname = "tcp_plb_rehash_rounds", + .data = &init_net.ipv4.sysctl_tcp_plb_rehash_rounds, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra2 = &tcp_plb_max_rounds, + }, + { + .procname = "tcp_plb_suspend_rto_sec", + .data = &init_net.ipv4.sysctl_tcp_plb_suspend_rto_sec, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + }, + { + .procname = "tcp_plb_cong_thresh", + .data = &init_net.ipv4.sysctl_tcp_plb_cong_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_plb_max_cong_thresh, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f8232811a5be..de8f0cd7cb32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk) WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); sk_sockets_allocated_inc(sk); } EXPORT_SYMBOL(tcp_init_sock); @@ -3175,6 +3176,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->sacked_out = 0; tp->tlp_high_seq = 0; tp->last_oow_ack_time = 0; + tp->plb_rehash = 0; /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; tp->rack.mstamp = 0; @@ -3938,6 +3940,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_reord_seen = tp->reord_seen; info->tcpi_rcv_ooopack = tp->rcv_ooopack; info->tcpi_snd_wnd = tp->snd_wnd; + info->tcpi_rcv_wnd = tp->rcv_wnd; + info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; unlock_sock_fast(sk, slow); } @@ -3972,6 +3976,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */ nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_REHASH */ 0; } @@ -4048,6 +4053,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u8(stats, TCP_NLA_TTL, tcp_skb_ttl_or_hop_limit(ack_skb)); + nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); return stats; } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 112f28f93693..ba4d98e510e0 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -243,7 +243,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - if (prandom_u32() <= nexp_u32(grad * backoff_factor)) + if (get_random_u32() <= nexp_u32(grad * backoff_factor)) return false; if (use_ineff) { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 2a6c0dd665a4..e0a2ca7456ff 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -54,6 +54,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 loss_cwnd; + struct tcp_plb_state plb; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -91,6 +92,8 @@ static void dctcp_init(struct sock *sk) ca->ce_state = 0; dctcp_reset(tp, ca); + tcp_plb_init(sk, &ca->plb); + return; } @@ -117,14 +120,28 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { + u32 delivered = tp->delivered - ca->old_delivered; u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; + u32 ce_ratio = 0; + + if (delivered > 0) { + /* dctcp_alpha keeps EWMA of fraction of ECN marked + * packets. Because of EWMA smoothing, PLB reaction can + * be slow so we use ce_ratio which is an instantaneous + * measure of congestion. ce_ratio is the fraction of + * ECN marked packets in the previous RTT. + */ + if (delivered_ce > 0) + ce_ratio = (delivered_ce << TCP_PLB_SCALE) / delivered; + tcp_plb_update_state(sk, &ca->plb, (int)ce_ratio); + tcp_plb_check_rehash(sk, &ca->plb); + } /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); if (delivered_ce) { - u32 delivered = tp->delivered - ca->old_delivered; /* If dctcp_shift_g == 1, a 32bit value would overflow * after 8 M packets. @@ -172,8 +189,12 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; case CA_EVENT_LOSS: + tcp_plb_update_state_upon_rto(sk, &ca->plb); dctcp_react_to_loss(sk); break; + case CA_EVENT_TX_START: + tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ + break; default: /* Don't care for the rest. */ break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc2ea12221f9..d764b5854dfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2192,7 +2192,8 @@ void tcp_enter_loss(struct sock *sk) */ static bool tcp_check_sack_reneging(struct sock *sk, int flag) { - if (flag & FLAG_SACK_RENEGING) { + if (flag & FLAG_SACK_RENEGING && + flag & FLAG_SND_UNA_ADVANCED) { struct tcp_sock *tp = tcp_sk(sk); unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); @@ -4763,8 +4764,8 @@ static void tcp_ofo_queue(struct sock *sk) } } -static bool tcp_prune_ofo_queue(struct sock *sk); -static int tcp_prune_queue(struct sock *sk); +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) @@ -4772,11 +4773,11 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, size)) { - if (tcp_prune_queue(sk) < 0) + if (tcp_prune_queue(sk, skb) < 0) return -1; while (!sk_rmem_schedule(sk, skb, size)) { - if (!tcp_prune_ofo_queue(sk)) + if (!tcp_prune_ofo_queue(sk, skb)) return -1; } } @@ -5328,6 +5329,8 @@ new_range: * Clean the out-of-order queue to make room. * We drop high sequences packets to : * 1) Let a chance for holes to be filled. + * This means we do not drop packets from ooo queue if their sequence + * is before incoming packet sequence. * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) @@ -5335,24 +5338,31 @@ new_range: * * Return true if queue has shrunk. */ -static bool tcp_prune_ofo_queue(struct sock *sk) +static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + bool pruned = false; int goal; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; - NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; + do { + struct sk_buff *skb = rb_to_skb(node); + + /* If incoming skb would land last in ofo queue, stop pruning. */ + if (after(TCP_SKB_CB(in_skb)->seq, TCP_SKB_CB(skb)->seq)) + break; + pruned = true; prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); - goal -= rb_to_skb(node)->truesize; - tcp_drop_reason(sk, rb_to_skb(node), - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + goal -= skb->truesize; + tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); + tp->ooo_last_skb = rb_to_skb(prev); if (!prev || goal <= 0) { if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) @@ -5361,16 +5371,18 @@ static bool tcp_prune_ofo_queue(struct sock *sk) } node = prev; } while (node); - tp->ooo_last_skb = rb_to_skb(prev); - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); - return true; + if (pruned) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + } + return pruned; } /* Reduce allocated memory if we can, trying to get @@ -5380,7 +5392,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) * until the socket owning process reads some of the data * to stabilize the situation. */ -static int tcp_prune_queue(struct sock *sk) +static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -5407,7 +5419,7 @@ static int tcp_prune_queue(struct sock *sk) /* Collapsing did not help, destructive actions follow. * This must not ever occur. */ - tcp_prune_ofo_queue(sk); + tcp_prune_ofo_queue(sk, in_skb); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6376ad915765..ebab9e8b184c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -323,7 +323,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr); } - inet->inet_id = prandom_u32(); + inet->inet_id = get_random_u16(); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1543,7 +1543,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = prandom_u32(); + newinet->inet_id = get_random_u16(); /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). @@ -1874,11 +1874,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, __skb_push(skb, hdrlen); no_coalesce: + limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); + /* Only socket owner can try to collapse/prune rx queues * to reduce memory overhead, so add a little headroom here. * Few sockets backlog are possibly concurrently non empty. */ - limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024; + limit += 64 * 1024; if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); @@ -3216,6 +3218,14 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); + /* Set default values for PLB */ + net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */ + net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3; + net->ipv4.sysctl_tcp_plb_rehash_rounds = 12; + net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60; + /* Default congestion threshold for PLB to mark a round is 50% */ + net->ipv4.sysctl_tcp_plb_cong_thresh = (1 << TCP_PLB_SCALE) / 2; + /* Reno is always built in */ if (!net_eq(net, &init_net) && bpf_try_module_get(init_net.ipv4.tcp_congestion_control, diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c new file mode 100644 index 000000000000..bb1a08fda113 --- /dev/null +++ b/net/ipv4/tcp_plb.c @@ -0,0 +1,109 @@ +/* Protective Load Balancing (PLB) + * + * PLB was designed to reduce link load imbalance across datacenter + * switches. PLB is a host-based optimization; it leverages congestion + * signals from the transport layer to randomly change the path of the + * connection experiencing sustained congestion. PLB prefers to repath + * after idle periods to minimize packet reordering. It repaths by + * changing the IPv6 Flow Label on the packets of a connection, which + * datacenter switches include as part of ECMP/WCMP hashing. + * + * PLB is described in detail in: + * + * Mubashir Adnan Qureshi, Yuchung Cheng, Qianwen Yin, Qiaobin Fu, + * Gautam Kumar, Masoud Moshref, Junhua Yan, Van Jacobson, + * David Wetherall,Abdul Kabbani: + * "PLB: Congestion Signals are Simple and Effective for + * Network Load Balancing" + * In ACM SIGCOMM 2022, Amsterdam Netherlands. + * + */ + +#include <net/tcp.h> + +/* Called once per round-trip to update PLB state for a connection. */ +void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, + const int cong_ratio) +{ + struct net *net = sock_net(sk); + + if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) + return; + + if (cong_ratio >= 0) { + if (cong_ratio < READ_ONCE(net->ipv4.sysctl_tcp_plb_cong_thresh)) + plb->consec_cong_rounds = 0; + else if (plb->consec_cong_rounds < + READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds)) + plb->consec_cong_rounds++; + } +} +EXPORT_SYMBOL_GPL(tcp_plb_update_state); + +/* Check whether recent congestion has been persistent enough to warrant + * a load balancing decision that switches the connection to another path. + */ +void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) +{ + struct net *net = sock_net(sk); + u32 max_suspend; + bool forced_rehash = false, idle_rehash = false; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) + return; + + forced_rehash = plb->consec_cong_rounds >= + READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds); + /* If sender goes idle then we check whether to rehash. */ + idle_rehash = READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds) && + !tcp_sk(sk)->packets_out && + plb->consec_cong_rounds >= + READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds); + + if (!forced_rehash && !idle_rehash) + return; + + /* Note that tcp_jiffies32 can wrap; we detect wraps by checking for + * cases where the max suspension end is before the actual suspension + * end. We clear pause_until to 0 to indicate there is no recent + * RTO event that constrains PLB rehashing. + */ + max_suspend = 2 * READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; + if (plb->pause_until && + (!before(tcp_jiffies32, plb->pause_until) || + before(tcp_jiffies32 + max_suspend, plb->pause_until))) + plb->pause_until = 0; + + if (plb->pause_until) + return; + + sk_rethink_txhash(sk); + plb->consec_cong_rounds = 0; + tcp_sk(sk)->plb_rehash++; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); +} +EXPORT_SYMBOL_GPL(tcp_plb_check_rehash); + +/* Upon RTO, disallow load balancing for a while, to avoid having load + * balancing decisions switch traffic to a black-holed path that was + * previously avoided with a sk_rethink_txhash() call at RTO time. + */ +void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb) +{ + struct net *net = sock_net(sk); + u32 pause; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) + return; + + pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; + pause += prandom_u32_max(pause); + plb->pause_until = tcp_jiffies32 + pause; + + /* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call + * that may switch this connection to a path with completely different + * congestion characteristics. + */ + plb->consec_cong_rounds = 0; +} +EXPORT_SYMBOL_GPL(tcp_plb_update_state_upon_rto); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8126f67d18b3..89accc3c8bb3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rand = prandom_u32(); + rand = get_random_u32(); first = reciprocal_scale(rand, remaining) + low; /* * force rand to be an odd multiple of UDP_HTABLE_SIZE @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; @@ -1448,7 +1448,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2) && + if (size < READ_ONCE(up->forward_threshold) && !skb_queue_empty(&up->reader_queue)) return; } else { @@ -1622,8 +1622,9 @@ static void udp_destruct_sock(struct sock *sk) int udp_init_sock(struct sock *sk) { - skb_queue_head_init(&udp_sk(sk)->reader_queue); + udp_lib_init_sock(sk); sk->sk_destruct = udp_destruct_sock; + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } @@ -2671,6 +2672,18 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int err = 0; int is_udplite = IS_UDPLITE(sk); + if (level == SOL_SOCKET) { + err = sk_setsockopt(sk, level, optname, optval, optlen); + + if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) { + sockopt_lock_sock(sk); + /* paired with READ_ONCE in udp_rmem_release() */ + WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2); + sockopt_release_sock(sk); + } + return err; + } + if (optlen < sizeof(int)) return -EINVAL; @@ -2784,7 +2797,7 @@ EXPORT_SYMBOL(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 10ce86bf228e..9c3f5202a97b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -104,7 +104,7 @@ static inline u32 cstamp_delta(unsigned long cstamp) static inline s32 rfc3315_s14_backoff_init(s32 irt) { /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ - u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + u64 tmp = (900000 + prandom_u32_max(200001)) * (u64)irt; do_div(tmp, 1000000); return (s32)tmp; } @@ -112,11 +112,11 @@ static inline s32 rfc3315_s14_backoff_init(s32 irt) static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) { /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ - u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + u64 tmp = (1900000 + prandom_u32_max(200001)) * (u64)rt; do_div(tmp, 1000000); if ((s32)tmp > mrt) { /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ - tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + tmp = (900000 + prandom_u32_max(200001)) * (u64)mrt; do_div(tmp, 1000000); } return (s32)tmp; @@ -3967,7 +3967,7 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); + rand_num = prandom_u32_max(idev->cnf.rtr_solicit_delay ?: 1); nonce = 0; if (idev->cnf.enhanced_dad || @@ -7214,9 +7214,11 @@ err_reg_dflt: __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(dflt); + net->ipv6.devconf_dflt = NULL; #endif err_alloc_dflt: kfree(all); + net->ipv6.devconf_all = NULL; err_alloc_all: kfree(net->ipv6.inet6_addr_lst); err_alloc_addr: diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 024191004982..68075295d587 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -114,6 +114,7 @@ void inet6_sock_destruct(struct sock *sk) inet6_cleanup_sock(sk); inet_sock_destruct(sk); } +EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) @@ -489,7 +490,7 @@ int inet6_release(struct socket *sock) } EXPORT_SYMBOL(inet6_release); -void inet6_destroy_sock(struct sock *sk) +void inet6_cleanup_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -514,12 +515,6 @@ void inet6_destroy_sock(struct sock *sk) txopt_put(opt); } } -EXPORT_SYMBOL_GPL(inet6_destroy_sock); - -void inet6_cleanup_sock(struct sock *sk) -{ - inet6_destroy_sock(sk); -} EXPORT_SYMBOL_GPL(inet6_cleanup_sock); /* diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index df665d4e8f0f..df7e032ce87d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -256,7 +256,7 @@ ipv4_connected: goto out; } - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: @@ -771,7 +771,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, } if (cmsg->cmsg_level == SOL_SOCKET) { - err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc); + err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc); if (err) return err; continue; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index ceb85c67ce39..18481eb76a0a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -220,7 +220,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { - fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; + fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); if (!lfl) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 02b1b54165e8..7673e1dd1147 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1155,14 +1155,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, dev->needed_headroom = dst_len; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - t_hlen; + int mtu = rt->dst.dev->mtu - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; if (dev->type == ARPHRD_ETHER) - dev->mtu -= ETH_HLEN; + mtu -= ETH_HLEN; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } ip6_rt_put(rt); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cc5d5e75b658..2fb4c6ad7243 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1450,8 +1450,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct net_device *tdev = NULL; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - unsigned int mtu; int t_hlen; + int mtu; __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1498,12 +1498,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) dev->hard_header_len = tdev->hard_header_len + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); - dev->mtu = mtu - t_hlen; + mtu = mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 532f4478c884..9ce51680290b 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1005,10 +1005,8 @@ unlock: return retv; e_inval: - sockopt_release_sock(sk); - if (needs_rtnl) - rtnl_unlock(); - return -EINVAL; + retv = -EINVAL; + goto unlock; } int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0566ab03ddbe..7860383295d8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1050,7 +1050,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, /* called with mc_lock */ static void mld_gq_start_work(struct inet6_dev *idev) { - unsigned long tv = prandom_u32() % idev->mc_maxdelay; + unsigned long tv = prandom_u32_max(idev->mc_maxdelay); idev->mc_gq_running = 1; if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) @@ -1068,7 +1068,7 @@ static void mld_gq_stop_work(struct inet6_dev *idev) /* called with mc_lock */ static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = prandom_u32() % delay; + unsigned long tv = prandom_u32_max(delay); if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); @@ -1085,7 +1085,7 @@ static void mld_ifc_stop_work(struct inet6_dev *idev) /* called with mc_lock */ static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = prandom_u32() % delay; + unsigned long tv = prandom_u32_max(delay); if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); @@ -1130,7 +1130,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) } if (delay >= resptime) - delay = prandom_u32() % resptime; + delay = prandom_u32_max(resptime); if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); @@ -2574,7 +2574,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); - delay = prandom_u32() % unsolicited_report_interval(ma->idev); + delay = prandom_u32_max(unsolicited_report_interval(ma->idev)); if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 69d86b040a6a..a01d9b842bd0 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -40,6 +40,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, + .flowi6_uid = sock_net_uid(net, NULL), .daddr = iph->saddr, }; int lookup_flags; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38db0064d661..d13240f13607 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -253,7 +253,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, if (err) { if (err == IPFRAG_DUP) { /* No error for duplicates, pretend they got queued. */ - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DUP_FRAG); return -EINPROGRESS; } goto insert_error; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 91faac610e03..36dc14b34388 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -66,6 +66,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, + .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; u32 ret = 0; @@ -163,6 +164,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, + .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; struct rt6_info *rt; int lookup_flags; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 2880dc7d9a49..2685c3f15e9d 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -18,7 +18,7 @@ static u32 __ipv6_select_ident(struct net *net, u32 id; do { - id = prandom_u32(); + id = get_random_u32(); } while (!id); return id; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 86c26e48d065..808983bc2ec9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -23,11 +23,6 @@ #include <linux/bpf-cgroup.h> #include <net/ping.h> -static void ping_v6_destroy(struct sock *sk) -{ - inet6_destroy_sock(sk); -} - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -205,7 +200,6 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .destroy = ping_v6_destroy, .pre_connect = ping_v6_pre_connect, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 722de9dd0ff7..a06a9f847db5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1173,8 +1173,6 @@ static void raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - - inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ff866f2a879e..5bc8a28e67f9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -112,10 +112,14 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail; struct net_device *dev; int err = -ENOENT; + SKB_DR(reason); u8 ecn; - if (fq->q.flags & INET_FRAG_COMPLETE) + /* If reassembly is already done, @skb must be a duplicate frag. */ + if (fq->q.flags & INET_FRAG_COMPLETE) { + SKB_DR_SET(reason, DUP_FRAG); goto err; + } err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -226,8 +230,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, insert_error: if (err == IPFRAG_DUP) { - kfree_skb(skb); - return -EINVAL; + SKB_DR_SET(reason, DUP_FRAG); + err = -EINVAL; + goto err; } err = -EINVAL; __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), @@ -237,7 +242,7 @@ discard_fq: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); err: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return err; } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 8370726ae7bf..487f8e98deaa 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1644,13 +1644,13 @@ static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) pcounters = per_cpu_ptr(slwt->pcpu_counters, i); do { - start = u64_stats_fetch_begin_irq(&pcounters->syncp); + start = u64_stats_fetch_begin(&pcounters->syncp); packets = u64_stats_read(&pcounters->packets); bytes = u64_stats_read(&pcounters->bytes); errors = u64_stats_read(&pcounters->errors); - } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); + } while (u64_stats_fetch_retry(&pcounters->syncp, start)); counters.packets += packets; counters.bytes += bytes; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d27683e3fc97..5703d3cbea9b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1124,10 +1124,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int mtu; - dev->mtu = tdev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + mtu = tdev->mtu - t_hlen; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2a3f9296df1e..f676be14e6b6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1966,12 +1966,6 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static void tcp_v6_destroy_sock(struct sock *sk) -{ - tcp_v4_destroy_sock(sk); - inet6_destroy_sock(sk); -} - #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, @@ -2164,7 +2158,7 @@ struct proto tcpv6_prot = { .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, - .destroy = tcp_v6_destroy_sock, + .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d09f0ea5b8c..297f7cc06044 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -64,7 +64,7 @@ static void udpv6_destruct_sock(struct sock *sk) int udpv6_init_sock(struct sock *sk) { - skb_queue_head_init(&udp_sk(sk)->reader_queue); + udp_lib_init_sock(sk); sk->sk_destruct = udpv6_destruct_sock; return 0; } @@ -195,7 +195,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; @@ -1661,8 +1661,6 @@ void udpv6_destroy_sock(struct sock *sk) udp_encap_disable(); } } - - inet6_destroy_sock(sk); } /* @@ -1671,7 +1669,7 @@ void udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 27725464ec08..a5004228111d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -162,7 +162,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) /* Buffer limit is okay now, add to ready list */ list_add_tail(&kcm->wait_rx_list, &kcm->mux->kcm_rx_waiters); - kcm->rx_wait = true; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, true); } static void kcm_rfree(struct sk_buff *skb) @@ -178,7 +179,7 @@ static void kcm_rfree(struct sk_buff *skb) /* For reading rx_wait and rx_psock without holding lock */ smp_mb__after_atomic(); - if (!kcm->rx_wait && !kcm->rx_psock && + if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) && sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); @@ -237,7 +238,8 @@ try_again: if (kcm_queue_rcv_skb(&kcm->sk, skb)) { /* Should mean socket buffer full */ list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); /* Commit rx_wait to read in kcm_free */ smp_wmb(); @@ -280,10 +282,12 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, kcm = list_first_entry(&mux->kcm_rx_waiters, struct kcm_sock, wait_rx_list); list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); psock->rx_kcm = kcm; - kcm->rx_psock = psock; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_psock, psock); spin_unlock_bh(&mux->rx_lock); @@ -310,7 +314,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_lock_bh(&mux->rx_lock); psock->rx_kcm = NULL; - kcm->rx_psock = NULL; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_psock, NULL); /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with * kcm_rfree @@ -834,7 +839,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page, } get_page(page); - skb_fill_page_desc(skb, i, page, offset, size); + skb_fill_page_desc_noacc(skb, i, page, offset, size); skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; coalesced: @@ -1240,7 +1245,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm) if (!kcm->rx_psock) { if (kcm->rx_wait) { list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); } requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); @@ -1793,7 +1799,8 @@ static void kcm_done(struct kcm_sock *kcm) if (kcm->rx_wait) { list_del(&kcm->wait_rx_list); - kcm->rx_wait = false; + /* paired with lockless reads in kcm_rfree() */ + WRITE_ONCE(kcm->rx_wait, false); } /* Move any pending receive messages to other kcm sockets */ requeue_rx_msgs(mux, &sk->sk_receive_queue); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9dbd801ddb98..2478aa60145f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -257,8 +257,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk) if (tunnel) l2tp_tunnel_delete(tunnel); - - inet6_destroy_sock(sk); } static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 9414d3bbd65f..c6fa53230450 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -183,34 +183,15 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, const struct ieee80211_addba_ext_ie *req, u16 buf_size) { - struct ieee80211_supported_band *sband; struct ieee80211_addba_ext_ie *resp; - const struct ieee80211_sta_he_cap *he_cap; - u8 frag_level, cap_frag_level; u8 *pos; - sband = ieee80211_get_sband(sdata); - if (!sband) - return; - he_cap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); - if (!he_cap) - return; - pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie)); *pos++ = WLAN_EID_ADDBA_EXT; *pos++ = sizeof(struct ieee80211_addba_ext_ie); resp = (struct ieee80211_addba_ext_ie *)pos; resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG; - frag_level = u32_get_bits(req->data, - IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); - cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0], - IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK); - if (frag_level > cap_frag_level) - frag_level = cap_frag_level; - resp->data |= u8_encode_bits(frag_level, - IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); resp->data |= u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT, IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); } @@ -242,7 +223,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) - memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); @@ -297,9 +278,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, } if (!sta->sta.deflink.ht_cap.ht_supported && - sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { + !sta->sta.deflink.he_cap.has_he) { ht_dbg(sta->sdata, - "STA %pM erroneously requests BA session on tid %d w/o QoS\n", + "STA %pM erroneously requests BA session on tid %d w/o HT\n", sta->sta.addr, tid); /* send a response anyway, it's an error case if we get here */ goto end; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 07c892aa8c73..9c40f8d3bce8 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -82,7 +82,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) - memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 687b4c878d4a..c848fe04dd44 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2554,47 +2554,50 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; u32 changed = 0; - if (!sdata_dereference(sdata->deflink.u.ap.beacon, sdata)) + link = ieee80211_link_or_deflink(sdata, params->link_id, true); + if (IS_ERR(link)) + return PTR_ERR(link); + + if (!sdata_dereference(link->u.ap.beacon, sdata)) return -ENOENT; - sband = ieee80211_get_sband(sdata); + sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->use_cts_prot >= 0) { - sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; + link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { - sdata->vif.bss_conf.use_short_preamble = - params->use_short_preamble; + link->conf->use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } - if (!sdata->vif.bss_conf.use_short_slot && + if (!link->conf->use_short_slot && (sband->band == NL80211_BAND_5GHZ || sband->band == NL80211_BAND_6GHZ)) { - sdata->vif.bss_conf.use_short_slot = true; + link->conf->use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } if (params->use_short_slot_time >= 0) { - sdata->vif.bss_conf.use_short_slot = - params->use_short_slot_time; + link->conf->use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } if (params->basic_rates) { - ieee80211_parse_bitrates(sdata->vif.bss_conf.chandef.width, + ieee80211_parse_bitrates(link->conf->chandef.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, - &sdata->vif.bss_conf.basic_rates); + &link->conf->basic_rates); changed |= BSS_CHANGED_BASIC_RATES; - ieee80211_check_rate_mask(&sdata->deflink); + ieee80211_check_rate_mask(link); } if (params->ap_isolate >= 0) { @@ -2606,30 +2609,29 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (params->ht_opmode >= 0) { - sdata->vif.bss_conf.ht_operation_mode = - (u16) params->ht_opmode; + link->conf->ht_operation_mode = (u16)params->ht_opmode; changed |= BSS_CHANGED_HT; } if (params->p2p_ctwindow >= 0) { - sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; - sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + link->conf->p2p_noa_attr.oppps_ctwindow |= params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } if (params->p2p_opp_ps > 0) { - sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + link->conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } else if (params->p2p_opp_ps == 0) { - sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } - ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); + ieee80211_link_info_change_notify(sdata, link, changed); return 0; } @@ -4338,9 +4340,6 @@ static int ieee80211_get_txq_stats(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; int ret = 0; - if (!local->ops->wake_tx_queue) - return 1; - spin_lock_bh(&local->fq.lock); rcu_read_lock(); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 78c7d60e8667..dfb9f55e2685 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -663,9 +663,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD_MODE(force_tx_status, 0600); DEBUGFS_ADD_MODE(aql_enable, 0600); DEBUGFS_ADD(aql_pending); - - if (local->ops->wake_tx_queue) - DEBUGFS_ADD_MODE(aqm, 0600); + DEBUGFS_ADD_MODE(aqm, 0600); DEBUGFS_ADD_MODE(airtime_flags, 0600); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5b014786fd2d..c87e1137e5da 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -677,8 +677,7 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); - if (sdata->local->ops->wake_tx_queue && - sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && sdata->vif.type != NL80211_IFTYPE_NAN) DEBUGFS_ADD(aqm); } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index d3397c1248d3..7a3d7893e19d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include <linux/debugfs.h> @@ -435,8 +435,29 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu } STA_OPS_RW(agg_status); -static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +/* link sta attributes */ +#define LINK_STA_OPS(name) \ +static const struct file_operations link_sta_ ##name## _ops = { \ + .read = link_sta_##name##_read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +} + +static ssize_t link_sta_addr_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct link_sta_info *link_sta = file->private_data; + u8 mac[3 * ETH_ALEN + 1]; + + snprintf(mac, sizeof(mac), "%pM\n", link_sta->pub->addr); + + return simple_read_from_buffer(userbuf, count, ppos, mac, 3 * ETH_ALEN); +} + +LINK_STA_OPS(addr); + +static ssize_t link_sta_ht_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) { #define PRINT_HT_CAP(_cond, _str) \ do { \ @@ -446,8 +467,8 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, char *buf, *p; int i; ssize_t bufsz = 512; - struct sta_info *sta = file->private_data; - struct ieee80211_sta_ht_cap *htc = &sta->sta.deflink.ht_cap; + struct link_sta_info *link_sta = file->private_data; + struct ieee80211_sta_ht_cap *htc = &link_sta->pub->ht_cap; ssize_t ret; buf = kzalloc(bufsz, GFP_KERNEL); @@ -524,14 +545,14 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, kfree(buf); return ret; } -STA_OPS(ht_capa); +LINK_STA_OPS(ht_capa); -static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t link_sta_vht_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) { char *buf, *p; - struct sta_info *sta = file->private_data; - struct ieee80211_sta_vht_cap *vhtc = &sta->sta.deflink.vht_cap; + struct link_sta_info *link_sta = file->private_data; + struct ieee80211_sta_vht_cap *vhtc = &link_sta->pub->vht_cap; ssize_t ret; ssize_t bufsz = 512; @@ -638,15 +659,15 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, kfree(buf); return ret; } -STA_OPS(vht_capa); +LINK_STA_OPS(vht_capa); -static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t link_sta_he_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) { char *buf, *p; size_t buf_sz = PAGE_SIZE; - struct sta_info *sta = file->private_data; - struct ieee80211_sta_he_cap *hec = &sta->sta.deflink.he_cap; + struct link_sta_info *link_sta = file->private_data; + struct ieee80211_sta_he_cap *hec = &link_sta->pub->he_cap; struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp; u8 ppe_size; u8 *cap; @@ -1011,7 +1032,7 @@ out: kfree(buf); return ret; } -STA_OPS(he_capa); +LINK_STA_OPS(he_capa); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ @@ -1048,18 +1069,11 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(num_ps_buf_frames); DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); - DEBUGFS_ADD(ht_capa); - DEBUGFS_ADD(vht_capa); - DEBUGFS_ADD(he_capa); - - DEBUGFS_ADD_COUNTER(rx_duplicates, deflink.rx_stats.num_duplicates); - DEBUGFS_ADD_COUNTER(rx_fragments, deflink.rx_stats.fragments); + /* FIXME: Kept here as the statistics are only done on the deflink */ DEBUGFS_ADD_COUNTER(tx_filtered, deflink.status_stats.filtered); - if (local->ops->wake_tx_queue) { - DEBUGFS_ADD(aqm); - DEBUGFS_ADD(airtime); - } + DEBUGFS_ADD(aqm); + DEBUGFS_ADD(airtime); if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) @@ -1076,3 +1090,85 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta) debugfs_remove_recursive(sta->debugfs_dir); sta->debugfs_dir = NULL; } + +#undef DEBUGFS_ADD +#undef DEBUGFS_ADD_COUNTER + +#define DEBUGFS_ADD(name) \ + debugfs_create_file(#name, 0400, \ + link_sta->debugfs_dir, link_sta, &link_sta_ ##name## _ops) +#define DEBUGFS_ADD_COUNTER(name, field) \ + debugfs_create_ulong(#name, 0400, link_sta->debugfs_dir, &link_sta->field) + +void ieee80211_link_sta_debugfs_add(struct link_sta_info *link_sta) +{ + if (WARN_ON(!link_sta->sta->debugfs_dir)) + return; + + /* For non-MLO, leave the files in the main directory. */ + if (link_sta->sta->sta.valid_links) { + char link_dir_name[10]; + + snprintf(link_dir_name, sizeof(link_dir_name), + "link-%d", link_sta->link_id); + + link_sta->debugfs_dir = + debugfs_create_dir(link_dir_name, + link_sta->sta->debugfs_dir); + + DEBUGFS_ADD(addr); + } else { + if (WARN_ON(link_sta != &link_sta->sta->deflink)) + return; + + link_sta->debugfs_dir = link_sta->sta->debugfs_dir; + } + + DEBUGFS_ADD(ht_capa); + DEBUGFS_ADD(vht_capa); + DEBUGFS_ADD(he_capa); + + DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); +} + +void ieee80211_link_sta_debugfs_remove(struct link_sta_info *link_sta) +{ + if (!link_sta->debugfs_dir || !link_sta->sta->debugfs_dir) { + link_sta->debugfs_dir = NULL; + return; + } + + if (link_sta->debugfs_dir == link_sta->sta->debugfs_dir) { + WARN_ON(link_sta != &link_sta->sta->deflink); + link_sta->sta->debugfs_dir = NULL; + return; + } + + debugfs_remove_recursive(link_sta->debugfs_dir); + link_sta->debugfs_dir = NULL; +} + +void ieee80211_link_sta_debugfs_drv_add(struct link_sta_info *link_sta) +{ + if (WARN_ON(!link_sta->debugfs_dir)) + return; + + drv_link_sta_add_debugfs(link_sta->sta->local, link_sta->sta->sdata, + link_sta->pub, link_sta->debugfs_dir); +} + +void ieee80211_link_sta_debugfs_drv_remove(struct link_sta_info *link_sta) +{ + if (!link_sta->debugfs_dir) + return; + + if (WARN_ON(link_sta->debugfs_dir == link_sta->sta->debugfs_dir)) + return; + + /* Recreate the directory excluding the driver data */ + debugfs_remove_recursive(link_sta->debugfs_dir); + link_sta->debugfs_dir = NULL; + + ieee80211_link_sta_debugfs_add(link_sta); +} diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h index d2e7c27ad6d1..cde8148bdb18 100644 --- a/net/mac80211/debugfs_sta.h +++ b/net/mac80211/debugfs_sta.h @@ -7,9 +7,21 @@ #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_sta_debugfs_add(struct sta_info *sta); void ieee80211_sta_debugfs_remove(struct sta_info *sta); + +void ieee80211_link_sta_debugfs_add(struct link_sta_info *link_sta); +void ieee80211_link_sta_debugfs_remove(struct link_sta_info *link_sta); + +void ieee80211_link_sta_debugfs_drv_add(struct link_sta_info *link_sta); +void ieee80211_link_sta_debugfs_drv_remove(struct link_sta_info *link_sta); #else static inline void ieee80211_sta_debugfs_add(struct sta_info *sta) {} static inline void ieee80211_sta_debugfs_remove(struct sta_info *sta) {} + +static inline void ieee80211_link_sta_debugfs_add(struct link_sta_info *link_sta) {} +static inline void ieee80211_link_sta_debugfs_remove(struct link_sta_info *link_sta) {} + +static inline void ieee80211_link_sta_debugfs_drv_add(struct link_sta_info *link_sta) {} +static inline void ieee80211_link_sta_debugfs_drv_remove(struct link_sta_info *link_sta) {} #endif #endif /* __MAC80211_DEBUGFS_STA_H */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 5392ffa18270..d737db4e07e2 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -7,6 +7,7 @@ #include "ieee80211_i.h" #include "trace.h" #include "driver-ops.h" +#include "debugfs_sta.h" int drv_start(struct ieee80211_local *local) { @@ -497,6 +498,11 @@ int drv_change_sta_links(struct ieee80211_local *local, struct ieee80211_sta *sta, u16 old_links, u16 new_links) { + struct sta_info *info = container_of(sta, struct sta_info, sta); + struct link_sta_info *link_sta; + unsigned long links_to_add; + unsigned long links_to_rem; + unsigned int link_id; int ret = -EOPNOTSUPP; might_sleep(); @@ -510,11 +516,30 @@ int drv_change_sta_links(struct ieee80211_local *local, if (old_links == new_links) return 0; + links_to_add = ~old_links & new_links; + links_to_rem = old_links & ~new_links; + + for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { + link_sta = rcu_dereference_protected(info->link[link_id], + lockdep_is_held(&local->sta_mtx)); + + ieee80211_link_sta_debugfs_drv_remove(link_sta); + } + trace_drv_change_sta_links(local, sdata, sta, old_links, new_links); if (local->ops->change_sta_links) ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta, old_links, new_links); trace_drv_return_int(local, ret); - return ret; + if (ret) + return ret; + + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { + link_sta = rcu_dereference_protected(info->link[link_id], + lockdep_is_held(&local->sta_mtx)); + ieee80211_link_sta_debugfs_drv_add(link_sta); + } + + return 0; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 81e40b0a3b16..809bad53e15b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -480,6 +480,22 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, local->ops->sta_add_debugfs(&local->hw, &sdata->vif, sta, dir); } + +static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, + struct dentry *dir) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->link_sta_add_debugfs) + local->ops->link_sta_add_debugfs(&local->hw, &sdata->vif, + link_sta, dir); +} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a842f2e1c230..63ff0d2524b6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -390,6 +390,7 @@ struct ieee80211_mgd_auth_data { bool done, waiting; bool peer_confirmed; bool timeout_started; + int link_id; u8 ap_addr[ETH_ALEN] __aligned(2); @@ -412,6 +413,8 @@ struct ieee80211_mgd_assoc_data { u8 *elems; /* pointing to inside ie[] below */ ieee80211_conn_flags_t conn_flags; + + u16 status; } link[IEEE80211_MLD_MAX_NUM_LINKS]; u8 ap_addr[ETH_ALEN] __aligned(2); @@ -1707,6 +1710,17 @@ struct ieee802_11_elems { u8 tx_pwr_env_num; u8 eht_cap_len; + /* mult-link element can be de-fragmented and thus u8 is not sufficient */ + size_t multi_link_len; + + /* + * store the per station profile pointer and length in case that the + * parsing also handled Multi-Link element parsing for a specific link + * ID. + */ + struct ieee80211_mle_per_sta_profile *prof; + size_t sta_prof_len; + /* whether a parse error occurred while retrieving these elements */ bool parse_error; @@ -2205,9 +2219,13 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, * represent a non-transmitting BSS in which case the data * for that non-transmitting BSS is returned * @link_id: the link ID to parse elements for, if a STA profile - * is present in the multi-link element, or -1 to ignore + * is present in the multi-link element, or -1 to ignore; + * note that the code currently assumes parsing an association + * (or re-association) response frame if this is given * @from_ap: frame is received from an AP (currently used only * for EHT capabilities parsing) + * @scratch_len: if non zero, specifies the requested length of the scratch + * buffer; otherwise, 'len' is used. */ struct ieee80211_elems_parse_params { const u8 *start; @@ -2218,6 +2236,7 @@ struct ieee80211_elems_parse_params { struct cfg80211_bss *bss; int link_id; bool from_ap; + size_t scratch_len; }; struct ieee802_11_elems * @@ -2288,7 +2307,6 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason, bool refcounted); -void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); void ieee80211_add_pending_skbs(struct ieee80211_local *local, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dd9ac1f7d2ea..7c4ce716c939 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -458,12 +458,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (cancel_scan) ieee80211_scan_cancel(local); - /* - * Stop TX on this interface first. - */ - if (!local->ops->wake_tx_queue && sdata->dev) - netif_tx_stop_all_queues(sdata->dev); - ieee80211_roc_purge(local, sdata); switch (sdata->vif.type) { @@ -811,13 +805,6 @@ static void ieee80211_uninit(struct net_device *dev) ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev)); } -static u16 ieee80211_netdev_select_queue(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev) -{ - return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); -} - static void ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -831,7 +818,6 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, - .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, }; @@ -939,7 +925,6 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_start_xmit = ieee80211_subif_start_xmit_8023, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, - .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, .ndo_fill_forward_path = ieee80211_netdev_fill_forward_path, }; @@ -1441,35 +1426,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); - if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - local->ops->wake_tx_queue) { - /* XXX: for AP_VLAN, actually track AP queues */ - if (dev) - netif_tx_start_all_queues(dev); - } else if (dev) { - unsigned long flags; - int n_acs = IEEE80211_NUM_ACS; - int ac; - - if (local->hw.queues < IEEE80211_NUM_ACS) - n_acs = 1; - - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - if (sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE || - (local->queue_stop_reasons[sdata->vif.cab_queue] == 0 && - skb_queue_empty(&local->pending[sdata->vif.cab_queue]))) { - for (ac = 0; ac < n_acs; ac++) { - int ac_queue = sdata->vif.hw_queue[ac]; - - if (local->queue_stop_reasons[ac_queue] == 0 && - skb_queue_empty(&local->pending[ac_queue])) - netif_start_subqueue(dev, ac); - } - } - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } - set_bit(SDATA_STATE_RUNNING, &sdata->state); return 0; @@ -1499,17 +1455,12 @@ static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &ieee80211_dataif_ops; dev->needs_free_netdev = true; dev->priv_destructor = ieee80211_if_free; } -static void ieee80211_if_setup_no_queue(struct net_device *dev) -{ - ieee80211_if_setup(dev); - dev->priv_flags |= IFF_NO_QUEUE; -} - static void ieee80211_iface_process_skb(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) @@ -2094,9 +2045,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct net_device *ndev = NULL; struct ieee80211_sub_if_data *sdata = NULL; struct txq_info *txqi; - void (*if_setup)(struct net_device *dev); int ret, i; - int txqs = 1; ASSERT_RTNL(); @@ -2119,30 +2068,18 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sizeof(void *)); int txq_size = 0; - if (local->ops->wake_tx_queue && - type != NL80211_IFTYPE_AP_VLAN && + if (type != NL80211_IFTYPE_AP_VLAN && (type != NL80211_IFTYPE_MONITOR || (params->flags & MONITOR_FLAG_ACTIVE))) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; - if (local->ops->wake_tx_queue) { - if_setup = ieee80211_if_setup_no_queue; - } else { - if_setup = ieee80211_if_setup; - if (local->hw.queues >= IEEE80211_NUM_ACS) - txqs = IEEE80211_NUM_ACS; - } - ndev = alloc_netdev_mqs(size + txq_size, name, name_assign_type, - if_setup, txqs, 1); + ieee80211_if_setup, 1, 1); if (!ndev) return -ENOMEM; - if (!local->ops->wake_tx_queue && local->hw.wiphy->tx_queue_len) - ndev->tx_queue_len = local->hw.wiphy->tx_queue_len; - dev_net_set(ndev, wiphy_net(local->hw.wiphy)); ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index e309708abae8..d1f5a9f7c647 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -357,6 +357,11 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata) continue; + + /* this is very temporary, but do it anyway */ + __ieee80211_sta_recalc_aggregates(sta, + old_active | active_links); + ret = drv_change_sta_links(local, sdata, &sta->sta, old_active, old_active | active_links); @@ -369,10 +374,22 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata) continue; + + __ieee80211_sta_recalc_aggregates(sta, active_links); + ret = drv_change_sta_links(local, sdata, &sta->sta, old_active | active_links, active_links); WARN_ON_ONCE(ret); + + /* + * Do it again, just in case - the driver might very + * well have called ieee80211_sta_recalc_aggregates() + * from there when filling in the new links, which + * would set it wrong since the vif's active links are + * not switched yet... + */ + __ieee80211_sta_recalc_aggregates(sta, active_links); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 46f3eddc2388..b7279d88cddb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -630,7 +630,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config || !ops->add_interface || !ops->remove_interface || - !ops->configure_filter)) + !ops->configure_filter || !ops->wake_tx_queue)) return NULL; if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove))) @@ -719,9 +719,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; - if (ops->wake_tx_queue) - wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS); - + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); wiphy->bss_priv_size = sizeof(struct ieee80211_bss); @@ -834,10 +832,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, atomic_set(&local->agg_queue_stop[i], 0); } tasklet_setup(&local->tx_pending_tasklet, ieee80211_tx_pending); - - if (ops->wake_tx_queue) - tasklet_setup(&local->wake_txqs_tasklet, ieee80211_wake_txqs); - + tasklet_setup(&local->wake_txqs_tasklet, ieee80211_wake_txqs); tasklet_setup(&local->tasklet, ieee80211_tasklet_handler); skb_queue_head_init(&local->skb_queue); @@ -1087,6 +1082,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) channels += sband->n_channels; + /* + * Due to the way the aggregation code handles this and it + * being an HT capability, we can't really support delayed + * BA in MLO (yet). + */ + if (WARN_ON(sband->ht_cap.ht_supported && + (sband->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA) && + hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + return -EINVAL; + if (max_bitrates < sband->n_bitrates) max_bitrates = sband->n_bitrates; supp_ht = supp_ht || sband->ht_cap.ht_supported; @@ -1155,6 +1160,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; + eth_broadcast_addr(local->int_scan_req->bssid); + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d8484cd870de..a804e0220ed7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2717,18 +2717,10 @@ static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link, } if (link->u.mgd.have_beacon) { - /* - * If the AP is buggy we may get here with no DTIM period - * known, so assume it's 1 which is the only safe assumption - * in that case, although if the TIM IE is broken powersave - * probably just won't work at all. - */ - bss_conf->dtim_period = link->u.mgd.dtim_period ?: 1; bss_conf->beacon_rate = bss->beacon_rate; changed |= BSS_CHANGED_BEACON_INFO; } else { bss_conf->beacon_rate = NULL; - bss_conf->dtim_period = 0; } /* Tell the driver to monitor connection quality (if supported) */ @@ -2754,7 +2746,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; struct ieee80211_link_data *link; - if (!cbss) + if (!cbss || + assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) continue; link = sdata_dereference(sdata->link[link_id], sdata); @@ -2782,7 +2775,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link; struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; - if (!cbss) + if (!cbss || + assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) continue; link = sdata_dereference(sdata->link[link_id], sdata); @@ -3868,9 +3862,15 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, } } -static bool ieee80211_twt_req_supported(const struct link_sta_info *link_sta, +static bool ieee80211_twt_req_supported(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct link_sta_info *link_sta, const struct ieee802_11_elems *elems) { + const struct ieee80211_sta_he_cap *own_he_cap = + ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (elems->ext_capab_len < 10) return false; @@ -3878,14 +3878,19 @@ static bool ieee80211_twt_req_supported(const struct link_sta_info *link_sta, return false; return link_sta->pub->he_cap.he_cap_elem.mac_cap_info[0] & - IEEE80211_HE_MAC_CAP0_TWT_RES; + IEEE80211_HE_MAC_CAP0_TWT_RES && + own_he_cap && + (own_he_cap->he_cap_elem.mac_cap_info[0] & + IEEE80211_HE_MAC_CAP0_TWT_REQ); } -static int ieee80211_recalc_twt_req(struct ieee80211_link_data *link, +static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct ieee80211_link_data *link, struct link_sta_info *link_sta, struct ieee802_11_elems *elems) { - bool twt = ieee80211_twt_req_supported(link_sta, elems); + bool twt = ieee80211_twt_req_supported(sdata, sband, link_sta, elems); if (link->conf->twt_requester != twt) { link->conf->twt_requester = twt; @@ -3923,11 +3928,12 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_local *local = sdata->local; + unsigned int link_id = link->link_id; struct ieee80211_elems_parse_params parse_params = { .start = elem_start, .len = elem_len, .bss = cbss, - .link_id = link == &sdata->deflink ? -1 : link->link_id, + .link_id = link_id == assoc_data->assoc_link_id ? -1 : link_id, .from_ap = true, }; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; @@ -3942,8 +3948,35 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, if (!elems) return false; - /* FIXME: use from STA profile element after parsing that */ - capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); + if (link_id == assoc_data->assoc_link_id) { + capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); + + /* + * we should not get to this flow unless the association was + * successful, so set the status directly to success + */ + assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS; + } else if (!elems->prof) { + ret = false; + goto out; + } else { + const u8 *ptr = elems->prof->variable + + elems->prof->sta_info_len - 1; + + /* + * During parsing, we validated that these fields exist, + * otherwise elems->prof would have been set to NULL. + */ + capab_info = get_unaligned_le16(ptr); + assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2); + + if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) { + link_info(link, "association response status code=%u\n", + assoc_data->link[link_id].status); + ret = true; + goto out; + } + } if (!is_s1g && !elems->supp_rates) { sdata_info(sdata, "no SuppRates element in AssocResp\n"); @@ -4099,7 +4132,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, else bss_conf->twt_protected = false; - *changed |= ieee80211_recalc_twt_req(link, link_sta, elems); + *changed |= ieee80211_recalc_twt_req(sdata, sband, link, + link_sta, elems); if (elems->eht_operation && elems->eht_cap && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) { @@ -4864,6 +4898,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, unsigned int link_id; struct sta_info *sta; u64 changed[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + u16 valid_links = 0; int err; mutex_lock(&sdata->local->sta_mtx); @@ -4876,8 +4911,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out_err; if (sdata->vif.valid_links) { - u16 valid_links = 0; - for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!assoc_data->link[link_id].bss) continue; @@ -4894,10 +4927,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; struct ieee80211_link_data *link; struct link_sta_info *link_sta; - if (!assoc_data->link[link_id].bss) + if (!cbss) continue; link = sdata_dereference(sdata->link[link_id], sdata); @@ -4906,28 +4940,36 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (sdata->vif.valid_links) link_info(link, - "local address %pM, AP link address %pM\n", + "local address %pM, AP link address %pM%s\n", link->conf->addr, - assoc_data->link[link_id].bss->bssid); + assoc_data->link[link_id].bss->bssid, + link_id == assoc_data->assoc_link_id ? + " (assoc)" : ""); link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->sta_mtx)); if (WARN_ON(!link_sta)) goto out_err; - if (link_id != assoc_data->assoc_link_id) { - struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; + if (!link->u.mgd.have_beacon) { const struct cfg80211_bss_ies *ies; rcu_read_lock(); - ies = rcu_dereference(cbss->ies); + ies = rcu_dereference(cbss->beacon_ies); + if (ies) + link->u.mgd.have_beacon = true; + else + ies = rcu_dereference(cbss->ies); ieee80211_get_dtim(ies, &link->conf->sync_dtim_count, &link->u.mgd.dtim_period); - link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; link->conf->beacon_int = cbss->beacon_interval; rcu_read_unlock(); + } + + link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; + if (link_id != assoc_data->assoc_link_id) { err = ieee80211_prep_channel(sdata, link, cbss, &link->u.mgd.conn_flags); if (err) { @@ -4947,6 +4989,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, &changed[link_id])) goto out_err; + if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) { + valid_links &= ~BIT(link_id); + ieee80211_sta_remove_link(sta, link_id); + continue; + } + if (link_id != assoc_data->assoc_link_id) { err = ieee80211_sta_activate_link(sta, link_id); if (err) @@ -4954,6 +5002,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } } + /* links might have changed due to rejected ones, set them again */ + ieee80211_vif_set_links(sdata, valid_links); + rate_control_rate_init(sta); if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) { @@ -5033,6 +5084,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_rx_assoc_resp resp = { .uapsd_queues = -1, }; + u8 ap_mld_addr[ETH_ALEN] __aligned(2); unsigned int link_id; sdata_assert_lock(sdata); @@ -5187,10 +5239,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; + if (!assoc_data->link[link_id].bss) continue; + resp.links[link_id].bss = assoc_data->link[link_id].bss; resp.links[link_id].addr = link->conf->addr; + resp.links[link_id].status = assoc_data->link[link_id].status; /* get uapsd queues configuration - same for all links */ resp.uapsd_queues = 0; @@ -5199,6 +5254,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, resp.uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; } + if (sdata->vif.valid_links) { + ether_addr_copy(ap_mld_addr, sdata->vif.cfg.ap_addr); + resp.ap_mld_addr = ap_mld_addr; + } + ieee80211_destroy_assoc_data(sdata, status_code == WLAN_STATUS_SUCCESS ? ASSOC_SUCCESS : @@ -5208,8 +5268,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, resp.len = len; resp.req_ies = ifmgd->assoc_req_ies; resp.req_ies_len = ifmgd->assoc_req_ies_len; - if (sdata->vif.valid_links) - resp.ap_mld_addr = sdata->vif.cfg.ap_addr; cfg80211_rx_assoc_resp(sdata->dev, &resp); notify_driver: drv_mgd_complete_tx(sdata->local, sdata, &info); @@ -5432,6 +5490,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; struct link_sta_info *link_sta; struct sta_info *sta; @@ -5694,7 +5753,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - changed |= ieee80211_recalc_twt_req(link, link_sta, elems); + if (WARN_ON(!link->conf->chandef.chan)) + goto free; + + sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; + + changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); if (ieee80211_config_bw(link, elems->ht_cap_elem, elems->vht_cap_elem, elems->ht_operation, @@ -6640,6 +6704,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, req->ap_mld_addr ?: req->bss->bssid, ETH_ALEN); auth_data->bss = req->bss; + auth_data->link_id = req->link_id; if (req->auth_data_len >= 4) { if (req->auth_type == NL80211_AUTHTYPE_SAE) { @@ -6658,7 +6723,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, * removal and re-addition of the STA entry in * ieee80211_prep_connection(). */ - cont_auth = ifmgd->auth_data && req->bss == ifmgd->auth_data->bss; + cont_auth = ifmgd->auth_data && req->bss == ifmgd->auth_data->bss && + ifmgd->auth_data->link_id == req->link_id; if (req->ie && req->ie_len) { memcpy(&auth_data->data[auth_data->data_len], @@ -6982,7 +7048,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, /* keep sta info, bssid if matching */ match = ether_addr_equal(ifmgd->auth_data->ap_addr, - assoc_data->ap_addr); + assoc_data->ap_addr) && + ifmgd->auth_data->link_id == req->link_id; ieee80211_destroy_auth_data(sdata, match); } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 7f3f5f51081d..762346598338 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1963,9 +1963,6 @@ minstrel_ht_alloc(struct ieee80211_hw *hw) /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; - if (hw->max_rates >= 4) - mp->has_mrr = true; - mp->hw = hw; mp->update_interval = HZ / 20; @@ -2036,7 +2033,7 @@ static void __init init_sample_table(void) memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { - prandom_bytes(rnd, sizeof(rnd)); + get_random_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { new_idx = (i + rnd[i]) % MCS_GROUP_RATES; while (sample_table[col][new_idx] != 0xff) diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 1766ff0c78d3..4be0401f7721 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -74,7 +74,6 @@ struct minstrel_priv { struct ieee80211_hw *hw; - bool has_mrr; unsigned int cw_min; unsigned int cw_max; unsigned int max_retry; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f99416d2e144..c28c6fbf786e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1571,9 +1571,6 @@ static void sta_ps_start(struct sta_info *sta) ieee80211_clear_fast_xmit(sta); - if (!sta->sta.txq[0]) - return; - for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi = to_txq_info(txq); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0e8c4f48c36d..dc3cdee51e66 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -641,7 +641,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) { struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - u16 sn = get_random_u32(); + u16 sn = get_random_u16(); info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO; hdr->seq_ctrl = diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cebfd148bb40..04e0f132b1d9 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -140,17 +140,15 @@ static void __cleanup_single_sta(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); } - if (sta->sta.txq[0]) { - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi; + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi; - if (!sta->sta.txq[i]) - continue; + if (!sta->sta.txq[i]) + continue; - txqi = to_txq_info(sta->sta.txq[i]); + txqi = to_txq_info(sta->sta.txq[i]); - ieee80211_txq_purge(local, txqi); - } + ieee80211_txq_purge(local, txqi); } for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -366,6 +364,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, if (unhash) link_sta_info_hash_del(sta->local, link_sta); + if (test_sta_flag(sta, WLAN_STA_INSERTED)) + ieee80211_link_sta_debugfs_remove(link_sta); + if (link_sta != &sta->deflink) alloc = container_of(link_sta, typeof(*alloc), info); @@ -425,8 +426,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); - if (sta->sta.txq[0]) - kfree(to_txq_info(sta->sta.txq[0])); + kfree(to_txq_info(sta->sta.txq[0])); kfree(rcu_dereference_raw(sta->sta.rates)); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); @@ -511,6 +511,7 @@ static void sta_info_add_link(struct sta_info *sta, link_info->sta = sta; link_info->link_id = link_id; link_info->pub = link_sta; + link_info->pub->sta = &sta->sta; link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); @@ -527,6 +528,8 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; + void *txq_data; + int size; int i; sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); @@ -596,21 +599,18 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->last_connected = ktime_get_seconds(); - if (local->ops->wake_tx_queue) { - void *txq_data; - int size = sizeof(struct txq_info) + - ALIGN(hw->txq_data_size, sizeof(void *)); + size = sizeof(struct txq_info) + + ALIGN(hw->txq_data_size, sizeof(void *)); - txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); - if (!txq_data) - goto free; + txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); + if (!txq_data) + goto free; - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txq = txq_data + i * size; + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txq = txq_data + i * size; - /* might not do anything for the bufferable MMPDU TXQ */ - ieee80211_txq_init(sdata, sta, txq, i); - } + /* might not do anything for the (bufferable) MMPDU TXQ */ + ieee80211_txq_init(sdata, sta, txq, i); } if (sta_prepare_rate_control(local, sta, gfp)) @@ -684,8 +684,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, return sta; free_txq: - if (sta->sta.txq[0]) - kfree(to_txq_info(sta->sta.txq[0])); + kfree(to_txq_info(sta->sta.txq[0])); free: sta_info_free_link(&sta->deflink); #ifdef CONFIG_MAC80211_MESH @@ -874,6 +873,26 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); + if (sta->sta.valid_links) { + int i; + + for (i = 0; i < ARRAY_SIZE(sta->link); i++) { + struct link_sta_info *link_sta; + + link_sta = rcu_dereference_protected(sta->link[i], + lockdep_is_held(&local->sta_mtx)); + + if (!link_sta) + continue; + + ieee80211_link_sta_debugfs_add(link_sta); + if (sdata->vif.active_links & BIT(i)) + ieee80211_link_sta_debugfs_drv_add(link_sta); + } + } else { + ieee80211_link_sta_debugfs_add(&sta->deflink); + ieee80211_link_sta_debugfs_drv_add(&sta->deflink); + } sinfo->generation = local->sta_generation; cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); @@ -1958,9 +1977,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * TIM recalculation. */ - if (!sta->sta.txq[0]) - return; - for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { if (!sta->sta.txq[tid] || !(driver_release_tids & BIT(tid)) || @@ -2127,22 +2143,30 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, } EXPORT_SYMBOL(ieee80211_sta_register_airtime); -void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) +void __ieee80211_sta_recalc_aggregates(struct sta_info *sta, u16 active_links) { - struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - struct ieee80211_link_sta *link_sta; - int link_id, i; bool first = true; + int link_id; - if (!pubsta->valid_links || !pubsta->mlo) { - pubsta->cur = &pubsta->deflink.agg; + if (!sta->sta.valid_links || !sta->sta.mlo) { + sta->sta.cur = &sta->sta.deflink.agg; return; } rcu_read_lock(); - for_each_sta_active_link(&sta->sdata->vif, pubsta, link_sta, link_id) { + for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) { + struct ieee80211_link_sta *link_sta; + int i; + + if (!(active_links & BIT(link_id))) + continue; + + link_sta = rcu_dereference(sta->sta.link[link_id]); + if (!link_sta) + continue; + if (first) { - sta->cur = pubsta->deflink.agg; + sta->cur = sta->sta.deflink.agg; first = false; continue; } @@ -2161,7 +2185,14 @@ void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) } rcu_read_unlock(); - pubsta->cur = &sta->cur; + sta->sta.cur = &sta->cur; +} + +void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + __ieee80211_sta_recalc_aggregates(sta, sta->sdata->vif.active_links); } EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); @@ -2396,9 +2427,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, u64 value; do { - start = u64_stats_fetch_begin_irq(&rxstats->syncp); + start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->msdu[tid]; - } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } @@ -2445,7 +2476,7 @@ static void sta_set_tidstats(struct sta_info *sta, tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; } - if (local->ops->wake_tx_queue && tid < IEEE80211_NUM_TIDS) { + if (tid < IEEE80211_NUM_TIDS) { spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -2464,9 +2495,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) u64 value; do { - start = u64_stats_fetch_begin_irq(&rxstats->syncp); + start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->bytes; - } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } @@ -2773,9 +2804,6 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) static void sta_update_codel_params(struct sta_info *sta, u32 thr) { - if (!sta->sdata->local->ops->wake_tx_queue) - return; - if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { sta->cparams.target = MS2TIME(50); sta->cparams.interval = MS2TIME(300); @@ -2823,6 +2851,8 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) sta_info_add_link(sta, link_id, &alloc->info, &alloc->sta); + ieee80211_link_sta_debugfs_add(&alloc->info); + return 0; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2517ea714dc4..69820b551668 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -513,6 +513,7 @@ struct ieee80211_fragment_cache { * @status_stats.avg_ack_signal: average ACK signal * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification + * @debugfs_dir: debug filesystem directory dentry * @pub: public (driver visible) link STA data * TODO Move other link params from sta_info as required for MLD operation */ @@ -560,6 +561,10 @@ struct link_sta_info { enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif + struct ieee80211_link_sta *pub; }; @@ -922,6 +927,8 @@ void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, const u8 *ext_capab, unsigned int ext_capab_len); +void __ieee80211_sta_recalc_aggregates(struct sta_info *sta, u16 active_links); + enum sta_stats_type { STA_STATS_RATE_TYPE_INVALID = 0, STA_STATS_RATE_TYPE_LEGACY, diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f4b4d25eef95..b255f3b5bf01 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1016,7 +1016,6 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, skb->priority = 256 + 5; break; } - skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a364148149f9..bb2e54610101 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1599,9 +1599,6 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) bool supp_vht = false; enum nl80211_band band; - if (!local->ops->wake_tx_queue) - return 0; - ret = fq_init(fq, 4096); if (ret) return ret; @@ -1649,9 +1646,6 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) { struct fq *fq = &local->fq; - if (!local->ops->wake_tx_queue) - return; - kfree(local->cvars); local->cvars = NULL; @@ -1668,8 +1662,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct ieee80211_vif *vif; struct txq_info *txqi; - if (!local->ops->wake_tx_queue || - sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) return false; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -2973,7 +2966,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, if (pre_conf_link_id != link_id && link_id != IEEE80211_LINK_UNSPECIFIED) { -#ifdef CPTCFG_MAC80211_VERBOSE_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG net_info_ratelimited("%s: dropped frame to %pM with bad link ID request (%d vs. %d)\n", sdata->name, hdr.addr1, pre_conf_link_id, link_id); @@ -4184,12 +4177,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, if (IS_ERR(sta)) sta = NULL; - if (local->ops->wake_tx_queue) { - u16 queue = __ieee80211_select_queue(sdata, sta, skb); - skb_set_queue_mapping(skb, queue); - skb_get_hash(skb); - } - + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb)); ieee80211_aggr_check(sdata, sta, skb); sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); @@ -4495,11 +4483,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx; u8 tid; - if (local->ops->wake_tx_queue) { - u16 queue = __ieee80211_select_queue(sdata, sta, skb); - skb_set_queue_mapping(skb, queue); - skb_get_hash(skb); - } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb)); if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)) && test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) @@ -4753,9 +4737,6 @@ void ieee80211_tx_pending(struct tasklet_struct *t) if (!txok) break; } - - if (skb_queue_empty(&local->pending[i])) - ieee80211_propagate_queue_wake(local, i); } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -5948,10 +5929,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, } if (!IS_ERR(sta)) { - u16 queue = __ieee80211_select_queue(sdata, sta, skb); + u16 queue = ieee80211_select_queue(sdata, sta, skb); skb_set_queue_mapping(skb, queue); - skb_get_hash(skb); /* * for MLO STA, the SA should be the AP MLD address, but diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b512cb37aafb..6f5407038459 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -288,6 +288,52 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ctstoself_duration); +static void wake_tx_push_queue(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_txq *queue) +{ + int q = sdata->vif.hw_queue[queue->ac]; + struct ieee80211_tx_control control = { + .sta = queue->sta, + }; + struct sk_buff *skb; + unsigned long flags; + bool q_stopped; + + while (1) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + q_stopped = local->queue_stop_reasons[q]; + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + if (q_stopped) + break; + + skb = ieee80211_tx_dequeue(&local->hw, queue); + if (!skb) + break; + + drv_tx(local, &control, skb); + } +} + +/* wake_tx_queue handler for driver not implementing a custom one*/ +void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); + struct ieee80211_txq *queue; + + /* Use ieee80211_next_txq() for airtime fairness accounting */ + ieee80211_txq_schedule_start(hw, txq->ac); + while ((queue = ieee80211_next_txq(hw, txq->ac))) { + wake_tx_push_queue(local, sdata, queue); + ieee80211_return_txq(hw, queue, false); + } + ieee80211_txq_schedule_end(hw, txq->ac); +} +EXPORT_SYMBOL(ieee80211_handle_wake_tx_queue); + static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) { struct ieee80211_local *local = sdata->local; @@ -400,39 +446,6 @@ void ieee80211_wake_txqs(struct tasklet_struct *t) spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) -{ - struct ieee80211_sub_if_data *sdata; - int n_acs = IEEE80211_NUM_ACS; - - if (local->ops->wake_tx_queue) - return; - - if (local->hw.queues < IEEE80211_NUM_ACS) - n_acs = 1; - - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - int ac; - - if (!sdata->dev) - continue; - - if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE && - local->queue_stop_reasons[sdata->vif.cab_queue] != 0) - continue; - - for (ac = 0; ac < n_acs; ac++) { - int ac_queue = sdata->vif.hw_queue[ac]; - - if (ac_queue == queue || - (sdata->vif.cab_queue == queue && - local->queue_stop_reasons[ac_queue] == 0 && - skb_queue_empty(&local->pending[ac_queue]))) - netif_wake_subqueue(sdata->dev, ac); - } - } -} - static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason, bool refcounted, @@ -463,11 +476,7 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, /* someone still has this queue stopped */ return; - if (skb_queue_empty(&local->pending[queue])) { - rcu_read_lock(); - ieee80211_propagate_queue_wake(local, queue); - rcu_read_unlock(); - } else + if (!skb_queue_empty(&local->pending[queue])) tasklet_schedule(&local->tx_pending_tasklet); /* @@ -477,12 +486,10 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, * release someone's lock, but it is fine because all the callers of * __ieee80211_wake_queue call it right before releasing the lock. */ - if (local->ops->wake_tx_queue) { - if (reason == IEEE80211_QUEUE_STOP_REASON_DRIVER) - tasklet_schedule(&local->wake_txqs_tasklet); - else - _ieee80211_wake_txqs(local, flags); - } + if (reason == IEEE80211_QUEUE_STOP_REASON_DRIVER) + tasklet_schedule(&local->wake_txqs_tasklet); + else + _ieee80211_wake_txqs(local, flags); } void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, @@ -539,10 +546,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, for (ac = 0; ac < n_acs; ac++) { if (sdata->vif.hw_queue[ac] == queue || sdata->vif.cab_queue == queue) { - if (!local->ops->wake_tx_queue) { - netif_stop_subqueue(sdata->dev, ac); - continue; - } spin_lock(&local->fq.lock); sdata->vif.txqs_stopped[ac] = true; spin_unlock(&local->fq.lock); @@ -1026,8 +1029,10 @@ ieee80211_parse_extension_element(u32 *crc, elems->eht_operation = data; break; case WLAN_EID_EXT_EHT_MULTI_LINK: - if (ieee80211_mle_size_ok(data, len)) + if (ieee80211_mle_size_ok(data, len)) { elems->multi_link = (void *)data; + elems->multi_link_len = len; + } break; } } @@ -1499,6 +1504,145 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } +static void ieee80211_defragment_element(struct ieee802_11_elems *elems, + void **elem_ptr, size_t *len, + size_t total_len, u8 frag_id) +{ + u8 *data = *elem_ptr, *pos, *start; + const struct element *elem; + + /* + * Since 'data' points to the data of the element, not the element + * itself, allow 254 in case it was an extended element where the + * extended ID isn't part of the data we see here and thus not part of + * 'len' either. + */ + if (!data || (*len != 254 && *len != 255)) + return; + + start = elems->scratch_pos; + + if (WARN_ON(*len > (elems->scratch + elems->scratch_len - + elems->scratch_pos))) + return; + + memcpy(elems->scratch_pos, data, *len); + elems->scratch_pos += *len; + + pos = data + *len; + total_len -= *len; + for_each_element(elem, pos, total_len) { + if (elem->id != frag_id) + break; + + if (WARN_ON(elem->datalen > + (elems->scratch + elems->scratch_len - + elems->scratch_pos))) + return; + + memcpy(elems->scratch_pos, elem->data, elem->datalen); + elems->scratch_pos += elem->datalen; + + *len += elem->datalen; + } + + *elem_ptr = start; +} + +static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, + u8 link_id) +{ + const struct ieee80211_multi_link_elem *ml = elems->multi_link; + size_t ml_len = elems->multi_link_len; + const struct element *sub; + + if (!ml || !ml_len) + return; + + if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + return; + + for_each_mle_subelement(sub, (u8 *)ml, ml_len) { + struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; + u16 control; + + if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) + continue; + + if (!ieee80211_mle_sta_prof_size_ok(sub->data, sub->datalen)) + return; + + control = le16_to_cpu(prof->control); + + if (link_id != u16_get_bits(control, + IEEE80211_MLE_STA_CONTROL_LINK_ID)) + continue; + + if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) + return; + + elems->prof = prof; + elems->sta_prof_len = sub->datalen; + + /* the sub element can be fragmented */ + ieee80211_defragment_element(elems, (void **)&elems->prof, + &elems->sta_prof_len, + ml_len - (sub->data - (u8 *)ml), + IEEE80211_MLE_SUBELEM_FRAGMENT); + return; + } +} + +static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, + struct ieee80211_elems_parse_params *params) +{ + struct ieee80211_mle_per_sta_profile *prof; + struct ieee80211_elems_parse_params sub = { + .action = params->action, + .from_ap = params->from_ap, + .link_id = -1, + }; + const struct element *non_inherit = NULL; + const u8 *end; + + if (params->link_id == -1) + return; + + ieee80211_defragment_element(elems, (void **)&elems->multi_link, + &elems->multi_link_len, + elems->total_len - ((u8 *)elems->multi_link - + elems->ie_start), + WLAN_EID_FRAGMENT); + + ieee80211_mle_get_sta_prof(elems, params->link_id); + prof = elems->prof; + + if (!prof) + return; + + /* check if we have the 4 bytes for the fixed part in assoc response */ + if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { + elems->prof = NULL; + elems->sta_prof_len = 0; + return; + } + + /* + * Skip the capability information and the status code that are expected + * as part of the station profile in association response frames. Note + * the -1 is because the 'sta_info_len' is accounted to as part of the + * per-STA profile, but not part of the 'u8 variable[]' portion. + */ + sub.start = prof->variable + prof->sta_info_len - 1 + 4; + end = (const u8 *)prof + elems->sta_prof_len; + sub.len = end - sub.start; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, sub.len); + _ieee802_11_parse_elems_full(&sub, elems, non_inherit); +} + struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { @@ -1506,7 +1650,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; - size_t scratch_len = params->len; + size_t scratch_len = params->scratch_len ?: 3 * params->len; elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC); if (!elems) @@ -1541,6 +1685,8 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) _ieee802_11_parse_elems_full(&sub, elems, NULL); } + ieee80211_mle_parse_link(elems, params); + if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index ecc1de2e68a5..a12c63638680 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -122,6 +122,9 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u8 *p; + /* Ensure hash is set prior to potential SW encryption */ + skb_get_hash(skb); + if ((info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER) || local->hw.queues < IEEE80211_NUM_ACS) return 0; @@ -141,12 +144,15 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, return ieee80211_downgrade_queue(sdata, NULL, skb); } -u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb) +u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) { struct mac80211_qos_map *qos_map; bool qos; + /* Ensure hash is set prior to potential SW encryption */ + skb_get_hash(skb); + /* all mesh/ocb stations are required to support WME */ if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || sdata->vif.type == NL80211_IFTYPE_OCB)) @@ -176,59 +182,6 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, return ieee80211_downgrade_queue(sdata, sta, skb); } - -/* Indicate which queue to use. */ -u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct sta_info *sta = NULL; - const u8 *ra = NULL; - u16 ret; - - /* when using iTXQ, we can do this later */ - if (local->ops->wake_tx_queue) - return 0; - - if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) { - skb->priority = 0; /* required for correct WPA/11i MIC */ - return 0; - } - - rcu_read_lock(); - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP_VLAN: - sta = rcu_dereference(sdata->u.vlan.sta); - if (sta) - break; - fallthrough; - case NL80211_IFTYPE_AP: - ra = skb->data; - break; - case NL80211_IFTYPE_STATION: - /* might be a TDLS station */ - sta = sta_info_get(sdata, skb->data); - if (sta) - break; - - ra = sdata->deflink.u.mgd.bssid; - break; - case NL80211_IFTYPE_ADHOC: - ra = skb->data; - break; - default: - break; - } - - if (!sta && ra && !is_multicast_ether_addr(ra)) - sta = sta_info_get(sdata, ra); - - ret = __ieee80211_select_queue(sdata, sta, skb); - - rcu_read_unlock(); - return ret; -} - /** * ieee80211_set_qos_hdr - Fill in the QoS header if there is one. * diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index 2e3dec0b6087..81f0039527a9 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -13,10 +13,8 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_hdr *hdr); -u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb); u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb); + struct sta_info *sta, struct sk_buff *skb); void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 1e4a9f74ed43..dc2d918fac68 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -46,7 +46,7 @@ static int ieee802154_suspend(struct wpan_phy *wpan_phy) if (!local->open_count) goto suspend; - ieee802154_stop_queue(&local->hw); + ieee802154_sync_and_hold_queue(local); synchronize_net(); /* stop hardware - this must stop RX */ @@ -67,12 +67,12 @@ static int ieee802154_resume(struct wpan_phy *wpan_phy) goto wake_up; /* restart hardware */ - ret = drv_start(local); + ret = drv_start(local, local->phy->filtering, &local->addr_filt); if (ret) return ret; wake_up: - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); local->suspended = false; return 0; } diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index d23f0db98015..a7af3f0ddb3e 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -24,203 +24,290 @@ drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) return local->ops->xmit_sync(&local->hw, skb); } -static inline int drv_start(struct ieee802154_local *local) +static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - trace_802154_drv_start(local); - local->started = true; - smp_mb(); - ret = local->ops->start(&local->hw); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } + + filt.pan_id = pan_id; + + trace_802154_drv_set_pan_id(local, pan_id); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_PANID_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline void drv_stop(struct ieee802154_local *local) +static inline int +drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) { - might_sleep(); + struct ieee802154_hw_addr_filt filt; + int ret; - trace_802154_drv_stop(local); - local->ops->stop(&local->hw); - trace_802154_drv_return_void(local); + might_sleep(); - /* sync away all work on the tasklet before clearing started */ - tasklet_disable(&local->tasklet); - tasklet_enable(&local->tasklet); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } - barrier(); + filt.ieee_addr = extended_addr; - local->started = false; + trace_802154_drv_set_extended_addr(local, extended_addr); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_IEEEADDR_CHANGED); + trace_802154_drv_return_int(local, ret); + return ret; } static inline int -drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) +drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - trace_802154_drv_set_channel(local, page, channel); - ret = local->ops->set_channel(&local->hw, page, channel); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } + + filt.short_addr = short_addr; + + trace_802154_drv_set_short_addr(local, short_addr); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_SADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) +static inline int +drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_txpower) { + if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } - trace_802154_drv_set_tx_power(local, mbm); - ret = local->ops->set_txpower(&local->hw, mbm); + filt.pan_coord = is_coord; + + trace_802154_drv_set_pan_coord(local, is_coord); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_PANC_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_cca_mode(struct ieee802154_local *local, - const struct wpan_phy_cca *cca) +static inline int +drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) { int ret; might_sleep(); - if (!local->ops->set_cca_mode) { + if (!local->ops->set_promiscuous_mode) { WARN_ON(1); return -EOPNOTSUPP; } - trace_802154_drv_set_cca_mode(local, cca); - ret = local->ops->set_cca_mode(&local->hw, cca); + trace_802154_drv_set_promiscuous_mode(local, on); + ret = local->ops->set_promiscuous_mode(&local->hw, on); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) +static inline int drv_start(struct ieee802154_local *local, + enum ieee802154_filtering_level level, + const struct ieee802154_hw_addr_filt *addr_filt) { int ret; might_sleep(); - if (!local->ops->set_lbt) { + /* setup receive mode parameters e.g. address mode */ + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, addr_filt->pan_id); + if (ret < 0) + return ret; + + ret = drv_set_short_addr(local, addr_filt->short_addr); + if (ret < 0) + return ret; + + ret = drv_set_extended_addr(local, addr_filt->ieee_addr); + if (ret < 0) + return ret; + } + + switch (level) { + case IEEE802154_FILTERING_NONE: + fallthrough; + case IEEE802154_FILTERING_1_FCS: + fallthrough; + case IEEE802154_FILTERING_2_PROMISCUOUS: + /* TODO: Requires a different receive mode setup e.g. + * at86rf233 hardware. + */ + fallthrough; + case IEEE802154_FILTERING_3_SCAN: + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + ret = drv_set_promiscuous_mode(local, true); + if (ret < 0) + return ret; + } else { + return -EOPNOTSUPP; + } + + /* In practice other filtering levels can be requested, but as + * for now most hardware/drivers only support + * IEEE802154_FILTERING_NONE, we fallback to this actual + * filtering level in hardware and make our own additional + * filtering in mac802154 receive path. + * + * TODO: Move this logic to the device drivers as hardware may + * support more higher level filters. Hardware may also require + * a different order how register are set, which could currently + * be buggy, so all received parameters need to be moved to the + * start() callback and let the driver go into the mode before + * it will turn on receive handling. + */ + local->phy->filtering = IEEE802154_FILTERING_NONE; + break; + case IEEE802154_FILTERING_4_FRAME_FIELDS: + /* Do not error out if IEEE802154_HW_PROMISCUOUS because we + * expect the hardware to operate at the level + * IEEE802154_FILTERING_4_FRAME_FIELDS anyway. + */ + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + ret = drv_set_promiscuous_mode(local, false); + if (ret < 0) + return ret; + } + + local->phy->filtering = IEEE802154_FILTERING_4_FRAME_FIELDS; + break; + default: WARN_ON(1); - return -EOPNOTSUPP; + return -EINVAL; } - trace_802154_drv_set_lbt_mode(local, mode); - ret = local->ops->set_lbt(&local->hw, mode); + trace_802154_drv_start(local); + local->started = true; + smp_mb(); + ret = local->ops->start(&local->hw); trace_802154_drv_return_int(local, ret); return ret; } +static inline void drv_stop(struct ieee802154_local *local) +{ + might_sleep(); + + trace_802154_drv_stop(local); + local->ops->stop(&local->hw); + trace_802154_drv_return_void(local); + + /* sync away all work on the tasklet before clearing started */ + tasklet_disable(&local->tasklet); + tasklet_enable(&local->tasklet); + + barrier(); + + local->started = false; +} + static inline int -drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) +drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) { int ret; might_sleep(); - if (!local->ops->set_cca_ed_level) { - WARN_ON(1); - return -EOPNOTSUPP; - } - - trace_802154_drv_set_cca_ed_level(local, mbm); - ret = local->ops->set_cca_ed_level(&local->hw, mbm); + trace_802154_drv_set_channel(local, page, channel); + ret = local->ops->set_channel(&local->hw, page, channel); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) +static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_txpower) { WARN_ON(1); return -EOPNOTSUPP; } - filt.pan_id = pan_id; - - trace_802154_drv_set_pan_id(local, pan_id); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_PANID_CHANGED); + trace_802154_drv_set_tx_power(local, mbm); + ret = local->ops->set_txpower(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } -static inline int -drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) +static inline int drv_set_cca_mode(struct ieee802154_local *local, + const struct wpan_phy_cca *cca) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_cca_mode) { WARN_ON(1); return -EOPNOTSUPP; } - filt.ieee_addr = extended_addr; - - trace_802154_drv_set_extended_addr(local, extended_addr); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_IEEEADDR_CHANGED); + trace_802154_drv_set_cca_mode(local, cca); + ret = local->ops->set_cca_mode(&local->hw, cca); trace_802154_drv_return_int(local, ret); return ret; } -static inline int -drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) +static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_lbt) { WARN_ON(1); return -EOPNOTSUPP; } - filt.short_addr = short_addr; - - trace_802154_drv_set_short_addr(local, short_addr); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_SADDR_CHANGED); + trace_802154_drv_set_lbt_mode(local, mode); + ret = local->ops->set_lbt(&local->hw, mode); trace_802154_drv_return_int(local, ret); return ret; } static inline int -drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) +drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_cca_ed_level) { WARN_ON(1); return -EOPNOTSUPP; } - filt.pan_coord = is_coord; - - trace_802154_drv_set_pan_coord(local, is_coord); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_PANC_CHANGED); + trace_802154_drv_set_cca_ed_level(local, mbm); + ret = local->ops->set_cca_ed_level(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } @@ -264,22 +351,4 @@ drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries) return ret; } -static inline int -drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) -{ - int ret; - - might_sleep(); - - if (!local->ops->set_promiscuous_mode) { - WARN_ON(1); - return -EOPNOTSUPP; - } - - trace_802154_drv_set_promiscuous_mode(local, on); - ret = local->ops->set_promiscuous_mode(&local->hw, on); - trace_802154_drv_return_int(local, ret); - return ret; -} - #endif /* __MAC802154_DRIVER_OPS */ diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 1381e6a5e180..509e0172fe82 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -26,6 +26,8 @@ struct ieee802154_local { struct ieee802154_hw hw; const struct ieee802154_ops *ops; + /* hardware address filter */ + struct ieee802154_hw_addr_filt addr_filt; /* ieee802154 phy */ struct wpan_phy *phy; @@ -55,7 +57,7 @@ struct ieee802154_local { struct sk_buff_head skb_queue; struct sk_buff *tx_skb; - struct work_struct tx_work; + struct work_struct sync_tx_work; /* A negative Linux error code or a null/positive MLME error status */ int tx_result; }; @@ -82,6 +84,16 @@ struct ieee802154_sub_if_data { struct ieee802154_local *local; struct net_device *dev; + /* Each interface starts and works in nominal state at a given filtering + * level given by iface_default_filtering, which is set once for all at + * the interface creation and should not evolve over time. For some MAC + * operations however, the filtering level may change temporarily, as + * reflected in the required_filtering field. The actual filtering at + * the PHY level may be different and is shown in struct wpan_phy. + */ + enum ieee802154_filtering_level iface_default_filtering; + enum ieee802154_filtering_level required_filtering; + unsigned long state; char name[IFNAMSIZ]; @@ -123,13 +135,53 @@ ieee802154_sdata_running(struct ieee802154_sub_if_data *sdata) extern struct ieee802154_mlme_ops mac802154_mlme_wpan; void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb); -void ieee802154_xmit_worker(struct work_struct *work); +void ieee802154_xmit_sync_worker(struct work_struct *work); +int ieee802154_sync_and_hold_queue(struct ieee802154_local *local); +int ieee802154_mlme_op_pre(struct ieee802154_local *local); +int ieee802154_mlme_tx(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); +void ieee802154_mlme_op_post(struct ieee802154_local *local); +int ieee802154_mlme_tx_one(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); netdev_tx_t ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer); +/** + * ieee802154_hold_queue - hold ieee802154 queue + * @local: main mac object + * + * Hold a queue by incrementing an atomic counter and requesting the netif + * queues to be stopped. The queues cannot be woken up while the counter has not + * been reset with as any ieee802154_release_queue() calls as needed. + */ +void ieee802154_hold_queue(struct ieee802154_local *local); + +/** + * ieee802154_release_queue - release ieee802154 queue + * @local: main mac object + * + * Release a queue which is held by decrementing an atomic counter and wake it + * up only if the counter reaches 0. + */ +void ieee802154_release_queue(struct ieee802154_local *local); + +/** + * ieee802154_disable_queue - disable ieee802154 queue + * @local: main mac object + * + * When trying to sync the Tx queue, we cannot just stop the queue + * (which is basically a bit being set without proper lock handling) + * because it would be racy. We actually need to call netif_tx_disable() + * instead, which is done by this helper. Restarting the queue can + * however still be done with a regular wake call. + */ +void ieee802154_disable_queue(struct ieee802154_local *local); + /* MIB callbacks */ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 500ed1b81250..d9b50884d34e 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -147,25 +147,12 @@ static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata) struct wpan_dev *wpan_dev = &sdata->wpan_dev; int ret; - if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { - ret = drv_set_promiscuous_mode(local, - wpan_dev->promiscuous_mode); - if (ret < 0) - return ret; - } + sdata->required_filtering = sdata->iface_default_filtering; if (local->hw.flags & IEEE802154_HW_AFILT) { - ret = drv_set_pan_id(local, wpan_dev->pan_id); - if (ret < 0) - return ret; - - ret = drv_set_extended_addr(local, wpan_dev->extended_addr); - if (ret < 0) - return ret; - - ret = drv_set_short_addr(local, wpan_dev->short_addr); - if (ret < 0) - return ret; + local->addr_filt.pan_id = wpan_dev->pan_id; + local->addr_filt.ieee_addr = wpan_dev->extended_addr; + local->addr_filt.short_addr = wpan_dev->short_addr; } if (local->hw.flags & IEEE802154_HW_LBT) { @@ -206,7 +193,8 @@ static int mac802154_slave_open(struct net_device *dev) if (res) goto err; - res = drv_start(local); + res = drv_start(local, sdata->required_filtering, + &local->addr_filt); if (res) goto err; } @@ -223,15 +211,16 @@ err: static int ieee802154_check_mac_settings(struct ieee802154_local *local, - struct wpan_dev *wpan_dev, - struct wpan_dev *nwpan_dev) + struct ieee802154_sub_if_data *sdata, + struct ieee802154_sub_if_data *nsdata) { + struct wpan_dev *nwpan_dev = &nsdata->wpan_dev; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + ASSERT_RTNL(); - if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { - if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode) - return -EBUSY; - } + if (sdata->iface_default_filtering != nsdata->iface_default_filtering) + return -EBUSY; if (local->hw.flags & IEEE802154_HW_AFILT) { if (wpan_dev->pan_id != nwpan_dev->pan_id || @@ -285,8 +274,7 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, /* check all phy mac sublayer settings are the same. * We have only one phy, different values makes trouble. */ - ret = ieee802154_check_mac_settings(local, wpan_dev, - &nsdata->wpan_dev); + ret = ieee802154_check_mac_settings(local, sdata, nsdata); if (ret < 0) return ret; } @@ -586,7 +574,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, sdata->dev->priv_destructor = mac802154_wpan_free; sdata->dev->netdev_ops = &mac802154_wpan_ops; sdata->dev->ml_priv = &mac802154_mlme_wpan; - wpan_dev->promiscuous_mode = false; + sdata->iface_default_filtering = IEEE802154_FILTERING_4_FRAME_FIELDS; wpan_dev->header_ops = &ieee802154_header_ops; mutex_init(&sdata->sec_mtx); @@ -600,7 +588,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, case NL802154_IFTYPE_MONITOR: sdata->dev->needs_free_netdev = true; sdata->dev->netdev_ops = &mac802154_monitor_ops; - wpan_dev->promiscuous_mode = true; + sdata->iface_default_filtering = IEEE802154_FILTERING_NONE; break; default: BUG(); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index bd7bdb1219dd..40fab08df24b 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -95,7 +95,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) skb_queue_head_init(&local->skb_queue); - INIT_WORK(&local->tx_work, ieee802154_xmit_worker); + INIT_WORK(&local->sync_tx_work, ieee802154_xmit_sync_worker); /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index c439125ef2b9..0724aac8f48c 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -34,6 +34,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, struct sk_buff *skb, const struct ieee802154_hdr *hdr) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct wpan_phy *wpan_phy = sdata->local->hw.phy; __le16 span, sshort; int rc; @@ -42,6 +43,17 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, span = wpan_dev->pan_id; sshort = wpan_dev->short_addr; + /* Level 3 filtering: Only beacons are accepted during scans */ + if (sdata->required_filtering == IEEE802154_FILTERING_3_SCAN && + sdata->required_filtering > wpan_phy->filtering) { + if (mac_cb(skb)->type != IEEE802154_FC_TYPE_BEACON) { + dev_dbg(&sdata->dev->dev, + "drop non-beacon frame (0x%x) during scan\n", + mac_cb(skb)->type); + goto fail; + } + } + switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: if (hdr->source.mode != IEEE802154_ADDR_NONE) @@ -114,8 +126,10 @@ fail: static void ieee802154_print_addr(const char *name, const struct ieee802154_addr *addr) { - if (addr->mode == IEEE802154_ADDR_NONE) + if (addr->mode == IEEE802154_ADDR_NONE) { pr_debug("%s not present\n", name); + return; + } pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id)); if (addr->mode == IEEE802154_ADDR_SHORT) { @@ -132,7 +146,7 @@ static int ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr) { int hlen; - struct ieee802154_mac_cb *cb = mac_cb_init(skb); + struct ieee802154_mac_cb *cb = mac_cb(skb); skb_reset_mac_header(skb); @@ -209,6 +223,13 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, if (!ieee802154_sdata_running(sdata)) continue; + /* Do not deliver packets received on interfaces expecting + * AACK=1 if the address filters where disabled. + */ + if (local->hw.phy->filtering < IEEE802154_FILTERING_4_FRAME_FIELDS && + sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS) + continue; + ieee802154_subif_frame(sdata, skb, &hdr); skb = NULL; break; @@ -268,10 +289,8 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb) ieee802154_monitors_rx(local, skb); - /* Check if transceiver doesn't validate the checksum. - * If not we validate the checksum here. - */ - if (local->hw.flags & IEEE802154_HW_RX_DROP_BAD_CKSUM) { + /* Level 1 filtering: Check the FCS by software when relevant */ + if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) { crc = crc_ccitt(0, skb->data, skb->len); if (crc) { rcu_read_unlock(); @@ -294,8 +313,9 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_local *local = hw_to_local(hw); + struct ieee802154_mac_cb *cb = mac_cb_init(skb); - mac_cb(skb)->lqi = lqi; + cb->lqi = lqi; skb->pkt_type = IEEE802154_RX_MSG; skb_queue_tail(&local->skb_queue, skb); tasklet_schedule(&local->tasklet); diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index c829e4a75325..9d8d43cf1e64 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -22,10 +22,10 @@ #include "ieee802154_i.h" #include "driver-ops.h" -void ieee802154_xmit_worker(struct work_struct *work) +void ieee802154_xmit_sync_worker(struct work_struct *work) { struct ieee802154_local *local = - container_of(work, struct ieee802154_local, tx_work); + container_of(work, struct ieee802154_local, sync_tx_work); struct sk_buff *skb = local->tx_skb; struct net_device *dev = skb->dev; int res; @@ -43,7 +43,9 @@ void ieee802154_xmit_worker(struct work_struct *work) err_tx: /* Restart the netif queue on each sub_if_data object. */ - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); + if (atomic_dec_and_test(&local->phy->ongoing_txs)) + wake_up(&local->phy->sync_txq); kfree_skb(skb); netdev_dbg(dev, "transmission failed\n"); } @@ -65,7 +67,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) consume_skb(skb); skb = nskb; } else { - goto err_tx; + goto err_free_skb; } } @@ -74,32 +76,134 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) } /* Stop the netif queue on each sub_if_data object. */ - ieee802154_stop_queue(&local->hw); + ieee802154_hold_queue(local); + atomic_inc(&local->phy->ongoing_txs); - /* async is priority, otherwise sync is fallback */ + /* Drivers should preferably implement the async callback. In some rare + * cases they only provide a sync callback which we will use as a + * fallback. + */ if (local->ops->xmit_async) { unsigned int len = skb->len; ret = drv_xmit_async(local, skb); - if (ret) { - ieee802154_wake_queue(&local->hw); - goto err_tx; - } + if (ret) + goto err_wake_netif_queue; dev->stats.tx_packets++; dev->stats.tx_bytes += len; } else { local->tx_skb = skb; - queue_work(local->workqueue, &local->tx_work); + queue_work(local->workqueue, &local->sync_tx_work); } return NETDEV_TX_OK; -err_tx: +err_wake_netif_queue: + ieee802154_release_queue(local); + if (atomic_dec_and_test(&local->phy->ongoing_txs)) + wake_up(&local->phy->sync_txq); +err_free_skb: kfree_skb(skb); return NETDEV_TX_OK; } +static int ieee802154_sync_queue(struct ieee802154_local *local) +{ + int ret; + + ieee802154_hold_queue(local); + ieee802154_disable_queue(local); + wait_event(local->phy->sync_txq, !atomic_read(&local->phy->ongoing_txs)); + ret = local->tx_result; + ieee802154_release_queue(local); + + return ret; +} + +int ieee802154_sync_and_hold_queue(struct ieee802154_local *local) +{ + int ret; + + ieee802154_hold_queue(local); + ret = ieee802154_sync_queue(local); + set_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); + + return ret; +} + +int ieee802154_mlme_op_pre(struct ieee802154_local *local) +{ + return ieee802154_sync_and_hold_queue(local); +} + +int ieee802154_mlme_tx(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + int ret; + + /* Avoid possible calls to ->ndo_stop() when we asynchronously perform + * MLME transmissions. + */ + rtnl_lock(); + + /* Ensure the device was not stopped, otherwise error out */ + if (!local->open_count) { + rtnl_unlock(); + return -ENETDOWN; + } + + /* Warn if the ieee802154 core thinks MLME frames can be sent while the + * net interface expects this cannot happen. + */ + if (WARN_ON_ONCE(!netif_running(sdata->dev))) { + rtnl_unlock(); + return -ENETDOWN; + } + + ieee802154_tx(local, skb); + ret = ieee802154_sync_queue(local); + + rtnl_unlock(); + + return ret; +} + +void ieee802154_mlme_op_post(struct ieee802154_local *local) +{ + ieee802154_release_queue(local); +} + +int ieee802154_mlme_tx_one(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + int ret; + + ieee802154_mlme_op_pre(local); + ret = ieee802154_mlme_tx(local, sdata, skb); + ieee802154_mlme_op_post(local); + + return ret; +} + +static bool ieee802154_queue_is_stopped(struct ieee802154_local *local) +{ + return test_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); +} + +static netdev_tx_t +ieee802154_hot_tx(struct ieee802154_local *local, struct sk_buff *skb) +{ + /* Warn if the net interface tries to transmit frames while the + * ieee802154 core assumes the queue is stopped. + */ + WARN_ON_ONCE(ieee802154_queue_is_stopped(local)); + + return ieee802154_tx(local, skb); +} + netdev_tx_t ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -107,7 +211,7 @@ ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->skb_iif = dev->ifindex; - return ieee802154_tx(sdata->local, skb); + return ieee802154_hot_tx(sdata->local, skb); } netdev_tx_t @@ -129,5 +233,5 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->skb_iif = dev->ifindex; - return ieee802154_tx(sdata->local, skb); + return ieee802154_hot_tx(sdata->local, skb); } diff --git a/net/mac802154/util.c b/net/mac802154/util.c index 9f024d85563b..ebc9a8521765 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -13,12 +13,23 @@ /* privid for wpan_phys to determine whether they belong to us or not */ const void *const mac802154_wpan_phy_privid = &mac802154_wpan_phy_privid; -void ieee802154_wake_queue(struct ieee802154_hw *hw) +/** + * ieee802154_wake_queue - wake ieee802154 queue + * @hw: main hardware object + * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we had to stop the queue to + * avoid new skb to come during the transmission. The queue then needs to be + * woken up after the operation. + */ +static void ieee802154_wake_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; rcu_read_lock(); + clear_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!sdata->dev) continue; @@ -27,9 +38,18 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw) } rcu_read_unlock(); } -EXPORT_SYMBOL(ieee802154_wake_queue); -void ieee802154_stop_queue(struct ieee802154_hw *hw) +/** + * ieee802154_stop_queue - stop ieee802154 queue + * @hw: main hardware object + * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we need to tell upper layers to + * stop giving us new skbs while we are busy with the transmitted one. The queue + * must then be stopped before transmitting. + */ +static void ieee802154_stop_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; @@ -43,14 +63,47 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw) } rcu_read_unlock(); } -EXPORT_SYMBOL(ieee802154_stop_queue); + +void ieee802154_hold_queue(struct ieee802154_local *local) +{ + unsigned long flags; + + spin_lock_irqsave(&local->phy->queue_lock, flags); + if (!atomic_fetch_inc(&local->phy->hold_txs)) + ieee802154_stop_queue(&local->hw); + spin_unlock_irqrestore(&local->phy->queue_lock, flags); +} + +void ieee802154_release_queue(struct ieee802154_local *local) +{ + unsigned long flags; + + spin_lock_irqsave(&local->phy->queue_lock, flags); + if (atomic_dec_and_test(&local->phy->hold_txs)) + ieee802154_wake_queue(&local->hw); + spin_unlock_irqrestore(&local->phy->queue_lock, flags); +} + +void ieee802154_disable_queue(struct ieee802154_local *local) +{ + struct ieee802154_sub_if_data *sdata; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!sdata->dev) + continue; + + netif_tx_disable(sdata->dev); + } + rcu_read_unlock(); +} enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer) { struct ieee802154_local *local = container_of(timer, struct ieee802154_local, ifs_timer); - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); return HRTIMER_NORESTART; } @@ -84,10 +137,12 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, hw->phy->sifs_period * NSEC_PER_USEC, HRTIMER_MODE_REL); } else { - ieee802154_wake_queue(hw); + ieee802154_release_queue(local); } dev_consume_skb_any(skb); + if (atomic_dec_and_test(&hw->phy->ongoing_txs)) + wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_complete); @@ -97,8 +152,10 @@ void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, struct ieee802154_local *local = hw_to_local(hw); local->tx_result = reason; - ieee802154_wake_queue(hw); + ieee802154_release_queue(local); dev_kfree_skb_any(skb); + if (atomic_dec_and_test(&hw->phy->ongoing_txs)) + wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_error); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b52afe316dc4..35b5f806fdda 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, p = per_cpu_ptr(mdev->stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); local = p->stats; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += local.rx_packets; stats->rx_bytes += local.rx_bytes; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f599ad44ed24..109eea2c65ff 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); } +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, + size_t len, int *copied_syn) +{ + unsigned int saved_flags = msg->msg_flags; + struct mptcp_sock *msk = mptcp_sk(sk); + int ret; + + lock_sock(ssk); + msg->msg_flags |= MSG_DONTWAIT; + msk->connect_flags = O_NONBLOCK; + msk->is_sendmsg = 1; + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); + msk->is_sendmsg = 0; + msg->msg_flags = saved_flags; + release_sock(ssk); + + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, + msg->msg_namelen, msg->msg_flags, 1); + + /* Keep the same behaviour of plain TCP: zero the copied bytes in + * case of any error, except timeout or signal + */ + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) + *copied_syn = 0; + } + + return ret; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1693,23 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ssock = __mptcp_nmpc_socket(msk); if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { - struct sock *ssk = ssock->sk; int copied_syn = 0; - lock_sock(ssk); - - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); copied += copied_syn; - if (ret == -EINPROGRESS && copied_syn > 0) { - /* reflect the new state on the MPTCP socket */ - inet_sk_state_store(sk, inet_sk_state_load(ssk)); - release_sock(ssk); + if (ret == -EINPROGRESS && copied_syn > 0) goto out; - } else if (ret) { - release_sock(ssk); + else if (ret) goto do_error; - } - release_sock(ssk); } timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -2708,6 +2730,8 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will * propagate the correct value */ @@ -2952,7 +2976,7 @@ static void mptcp_close(struct sock *sk, long timeout) sock_put(sk); } -static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) +void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) const struct ipv6_pinfo *ssk6 = inet6_sk(ssk); @@ -3507,10 +3531,73 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } +static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + subflow->request_mptcp = 0; + __mptcp_do_fallback(msk); +} + +static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssock; + int err = -EINVAL; + + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) + return -EINVAL; + + mptcp_token_destroy(msk); + inet_sk_state_store(sk, TCP_SYN_SENT); + subflow = mptcp_subflow_ctx(ssock->sk); +#ifdef CONFIG_TCP_MD5SIG + /* no MPTCP if MD5SIG is enabled on this socket or we may run out of + * TCP option space. + */ + if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) + mptcp_subflow_early_fallback(msk, subflow); +#endif + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { + MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); + mptcp_subflow_early_fallback(msk, subflow); + } + if (likely(!__mptcp_check_fallback(msk))) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); + + /* if reaching here via the fastopen/sendmsg path, the caller already + * acquired the subflow socket lock, too. + */ + if (msk->is_sendmsg) + err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); + else + err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); + inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; + + /* on successful connect, the msk state will be moved to established by + * subflow_finish_connect() + */ + if (unlikely(err && err != -EINPROGRESS)) { + inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); + return err; + } + + mptcp_copy_inaddrs(sk, ssock->sk); + + /* unblocking connect, mptcp-level inet_stream_connect will error out + * without changing the socket state, update it here. + */ + if (err == -EINPROGRESS) + sk->sk_socket->state = ssock->state; + return err; +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, .init = mptcp_init_sock, + .connect = mptcp_connect, .disconnect = mptcp_disconnect, .close = mptcp_close, .accept = mptcp_accept, @@ -3562,78 +3649,16 @@ unlock: return err; } -static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) -{ - subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); -} - static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { - struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct mptcp_subflow_context *subflow; - struct socket *ssock; - int err = -EINVAL; + int ret; lock_sock(sock->sk); - if (uaddr) { - if (addr_len < sizeof(uaddr->sa_family)) - goto unlock; - - if (uaddr->sa_family == AF_UNSPEC) { - err = mptcp_disconnect(sock->sk, flags); - sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; - goto unlock; - } - } - - if (sock->state != SS_UNCONNECTED && msk->subflow) { - /* pending connection or invalid state, let existing subflow - * cope with that - */ - ssock = msk->subflow; - goto do_connect; - } - - ssock = __mptcp_nmpc_socket(msk); - if (!ssock) - goto unlock; - - mptcp_token_destroy(msk); - inet_sk_state_store(sock->sk, TCP_SYN_SENT); - subflow = mptcp_subflow_ctx(ssock->sk); -#ifdef CONFIG_TCP_MD5SIG - /* no MPTCP if MD5SIG is enabled on this socket or we may run out of - * TCP option space. - */ - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) - mptcp_subflow_early_fallback(msk, subflow); -#endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); - mptcp_subflow_early_fallback(msk, subflow); - } - if (likely(!__mptcp_check_fallback(msk))) - MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE); - -do_connect: - err = ssock->ops->connect(ssock, uaddr, addr_len, flags); - inet_sk(sock->sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; - sock->state = ssock->state; - - /* on successful connect, the msk state will be moved to established by - * subflow_finish_connect() - */ - if (!err || err == -EINPROGRESS) - mptcp_copy_inaddrs(sock->sk, ssock->sk); - else - inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); - -unlock: + mptcp_sk(sock->sk)->connect_flags = flags; + ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); release_sock(sock->sk); - return err; + return ret; } static int mptcp_listen(struct socket *sock, int backlog) @@ -3684,6 +3709,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_subflow_context *subflow; struct sock *newsk = newsock->sk; + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + lock_sock(newsk); /* PM/worker can now acquire the first subflow socket @@ -3699,7 +3726,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (mptcp_is_fully_established(newsk)) mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); mptcp_propagate_sndbuf(newsk, msk->first); @@ -3898,12 +3924,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { static struct proto mptcp_v6_prot; -static void mptcp_v6_destroy(struct sock *sk) -{ - mptcp_destroy(sk); - inet6_destroy_sock(sk); -} - static struct inet_protosw mptcp_v6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_MPTCP, @@ -3919,7 +3939,6 @@ int __init mptcp_proto_v6_init(void) mptcp_v6_prot = mptcp_prot; strcpy(mptcp_v6_prot.name, "MPTCPv6"); mptcp_v6_prot.slab = NULL; - mptcp_v6_prot.destroy = mptcp_v6_destroy; mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock); err = proto_register(&mptcp_v6_prot, 1); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c0b5b4628f65..6a09ab99a12d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -285,7 +285,9 @@ struct mptcp_sock { u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, - nodelay:1; + nodelay:1, + is_sendmsg:1; + int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -599,6 +601,7 @@ int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); +void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index c7cb68c725b2..f85e9bbfe86f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -560,6 +560,7 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TX_DELAY: case TCP_INQ: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_NO_COOKIE: return true; } @@ -568,8 +569,8 @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ - /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE, - * are not supported fastopen is currently unsupported + /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because + * fastopen for the listener side is currently unsupported */ } return false; @@ -757,29 +758,17 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } -static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) -{ - struct socket *listener; - - listener = __mptcp_nmpc_socket(msk); - if (!listener) - return 0; /* TCP_DEFER_ACCEPT does not fail */ - - return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); -} - -static int mptcp_setsockopt_sol_tcp_fastopen_connect(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct socket *sock; - /* Limit to first subflow */ + /* Limit to first subflow, before the connection establishment */ sock = __mptcp_nmpc_socket(msk); if (!sock) return -EINVAL; - return tcp_setsockopt(sock->sk, SOL_TCP, TCP_FASTOPEN_CONNECT, optval, optlen); + return tcp_setsockopt(sock->sk, level, optname, optval, optlen); } static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, @@ -809,9 +798,13 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_NODELAY: return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: - return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ + mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); + return 0; case TCP_FASTOPEN_CONNECT: - return mptcp_setsockopt_sol_tcp_fastopen_connect(msk, optval, optlen); + case TCP_FASTOPEN_NO_COOKIE: + return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); } return -EOPNOTSUPP; @@ -1174,6 +1167,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CC_INFO: case TCP_DEFER_ACCEPT: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_NO_COOKIE: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); case TCP_INQ: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 07dd23d0fe04..437a283ba6ea 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -723,6 +723,8 @@ create_child: goto dispose_child; } + if (new_msk) + mptcp_copy_inaddrs(new_msk, child); subflow_drop_ctx(child); goto out; } @@ -750,6 +752,11 @@ create_child: ctx->conn = new_msk; new_msk = NULL; + /* set msk addresses early to ensure mptcp_pm_get_local_id() + * uses the correct data + */ + mptcp_copy_inaddrs(ctx->conn, child); + /* with OoO packets we can reach here without ingress * mpc option */ @@ -1595,7 +1602,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) /* kernel sockets do not by default acquire net ref, but TCP timer * needs it. + * Update ns_tracker to current stack trace and refcounted tracker. */ + __netns_tracker_free(net, &sf->sk->ns_tracker, false); sf->sk->sk_net_refcnt = 1; get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4b8d04640ff3..0846bd75b1da 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -568,12 +568,6 @@ config NFT_TUNNEL This option adds the "tunnel" expression that you can use to set tunneling policies. -config NFT_OBJREF - tristate "Netfilter nf_tables stateful object reference module" - help - This option adds the "objref" expression that allows you to refer to - stateful objects, such as counters and quotas. - config NFT_QUEUE depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0f060d100880..1d4db1943936 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -86,7 +86,8 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \ - nft_counter.o nft_chain_route.o nf_tables_offload.o \ + nft_counter.o nft_objref.o nft_inner.o \ + nft_chain_route.o nf_tables_offload.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ nft_set_pipapo.o @@ -104,7 +105,6 @@ obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o -obj-$(CONFIG_NFT_OBJREF) += nft_objref.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index fb67f1ca2495..8c04bb57dd6f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1308,7 +1308,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { - unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; + unsigned int hash = get_random_u32() & ip_vs_conn_tab_mask; hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->ipvs != ipvs) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 988222fff9f0..4d62059a6021 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2296,13 +2296,13 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) u64 conns, inpkts, outpkts, inbytes, outbytes; do { - start = u64_stats_fetch_begin_irq(&u->syncp); + start = u64_stats_fetch_begin(&u->syncp); conns = u->cnt.conns; inpkts = u->cnt.inpkts; outpkts = u->cnt.outpkts; inbytes = u->cnt.inbytes; outbytes = u->cnt.outbytes; - } while (u64_stats_fetch_retry_irq(&u->syncp, start)); + } while (u64_stats_fetch_retry(&u->syncp, start)); seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", i, (u64)conns, (u64)inpkts, diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c index acb55d8393ef..f2579fc9c75b 100644 --- a/net/netfilter/ipvs/ip_vs_twos.c +++ b/net/netfilter/ipvs/ip_vs_twos.c @@ -71,8 +71,8 @@ static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc, * from 0 to total_weight */ total_weight += 1; - rweight1 = prandom_u32() % total_weight; - rweight2 = prandom_u32() % total_weight; + rweight1 = prandom_u32_max(total_weight); + rweight2 = prandom_u32_max(total_weight); /* Pick two weighted servers */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d8e6380f6337..18319a6e6806 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -468,7 +468,7 @@ find_free_id: if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) off = (ntohs(*keyptr) - ntohs(range->base_proto.all)); else - off = prandom_u32(); + off = get_random_u16(); attempts = range_size; if (attempts > max_attempts) @@ -490,7 +490,7 @@ another_round: if (attempts >= range_size || attempts < 16) return; attempts /= 2; - off = prandom_u32(); + off = get_random_u16(); goto another_round; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a0653a8dfa82..62da204eed41 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1534,10 +1534,10 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); do { - seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + seq = u64_stats_fetch_begin(&cpu_stats->syncp); pkts = cpu_stats->pkts; bytes = cpu_stats->bytes; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); total.pkts += pkts; total.bytes += bytes; } @@ -2857,6 +2857,43 @@ err1: return err; } +int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, + struct nft_expr_info *info) +{ + struct nlattr *tb[NFTA_EXPR_MAX + 1]; + const struct nft_expr_type *type; + int err; + + err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla, + nft_expr_policy, NULL); + if (err < 0) + return err; + + if (!tb[NFTA_EXPR_DATA]) + return -EINVAL; + + type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + + if (!type->inner_ops) + return -EOPNOTSUPP; + + err = nla_parse_nested_deprecated(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], + type->policy, NULL); + if (err < 0) + goto err_nla_parse; + + info->attr = nla; + info->ops = type->inner_ops; + + return 0; + +err_nla_parse: + return err; +} + static int nf_tables_newexpr(const struct nft_ctx *ctx, const struct nft_expr_info *expr_info, struct nft_expr *expr) @@ -5865,8 +5902,9 @@ static bool nft_setelem_valid_key_end(const struct nft_set *set, (NFT_SET_CONCAT | NFT_SET_INTERVAL)) { if (flags & NFT_SET_ELEM_INTERVAL_END) return false; - if (!nla[NFTA_SET_ELEM_KEY_END] && - !(flags & NFT_SET_ELEM_CATCHALL)) + + if (nla[NFTA_SET_ELEM_KEY_END] && + flags & NFT_SET_ELEM_CATCHALL) return false; } else { if (nla[NFTA_SET_ELEM_KEY_END]) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index cee3e4e905ec..709a736c301c 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -340,6 +340,8 @@ static struct nft_expr_type *nft_basic_types[] = { &nft_exthdr_type, &nft_last_type, &nft_counter_type, + &nft_objref_type, + &nft_inner_type, }; static struct nft_object_type *nft_basic_objects[] = { diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c new file mode 100644 index 000000000000..eae7caeff316 --- /dev/null +++ b/net/netfilter/nft_inner.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> + */ + +#include <linux/kernel.h> +#include <linux/if_vlan.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> +#include <net/netfilter/nf_tables_offload.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <net/gre.h> +#include <net/geneve.h> +#include <net/ip.h> +#include <linux/icmpv6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx); + +/* Same layout as nft_expr but it embeds the private expression data area. */ +struct __nft_expr { + const struct nft_expr_ops *ops; + union { + struct nft_payload payload; + struct nft_meta meta; + } __attribute__((aligned(__alignof__(u64)))); +}; + +enum { + NFT_INNER_EXPR_PAYLOAD, + NFT_INNER_EXPR_META, +}; + +struct nft_inner { + u8 flags; + u8 hdrsize; + u8 type; + u8 expr_type; + + struct __nft_expr expr; +}; + +static int nft_inner_parse_l2l3(const struct nft_inner *priv, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *ctx, u32 off) +{ + __be16 llproto, outer_llproto; + u32 nhoff, thoff; + + if (priv->flags & NFT_INNER_LL) { + struct vlan_ethhdr *veth, _veth; + struct ethhdr *eth, _eth; + u32 hdrsize; + + eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); + if (!eth) + return -1; + + switch (eth->h_proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + llproto = eth->h_proto; + hdrsize = sizeof(_eth); + break; + case htons(ETH_P_8021Q): + veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); + if (!eth) + return -1; + + outer_llproto = veth->h_vlan_encapsulated_proto; + llproto = veth->h_vlan_proto; + hdrsize = sizeof(_veth); + break; + default: + return -1; + } + + ctx->inner_lloff = off; + ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; + off += hdrsize; + } else { + struct iphdr *iph; + u32 _version; + + iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); + if (!iph) + return -1; + + switch (iph->version) { + case 4: + llproto = htons(ETH_P_IP); + break; + case 6: + llproto = htons(ETH_P_IPV6); + break; + default: + return -1; + } + } + + ctx->llproto = llproto; + if (llproto == htons(ETH_P_8021Q)) + llproto = outer_llproto; + + nhoff = off; + + switch (llproto) { + case htons(ETH_P_IP): { + struct iphdr *iph, _iph; + + iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); + if (!iph) + return -1; + + if (iph->ihl < 5 || iph->version != 4) + return -1; + + ctx->inner_nhoff = nhoff; + ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; + + thoff = nhoff + (iph->ihl * 4); + if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { + ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; + ctx->inner_thoff = thoff; + ctx->l4proto = iph->protocol; + } + } + break; + case htons(ETH_P_IPV6): { + struct ipv6hdr *ip6h, _ip6h; + int fh_flags = IP6_FH_F_AUTH; + unsigned short fragoff; + int l4proto; + + ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + ctx->inner_nhoff = nhoff; + ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; + + thoff = nhoff; + l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); + if (l4proto < 0 || thoff > U16_MAX) + return -1; + + if (fragoff == 0) { + thoff = nhoff + sizeof(_ip6h); + ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; + ctx->inner_thoff = thoff; + ctx->l4proto = l4proto; + } + } + break; + default: + return -1; + } + + return 0; +} + +static int nft_inner_parse_tunhdr(const struct nft_inner *priv, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *ctx, u32 *off) +{ + if (pkt->tprot == IPPROTO_GRE) { + ctx->inner_tunoff = pkt->thoff; + ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; + return 0; + } + + if (pkt->tprot != IPPROTO_UDP) + return -1; + + ctx->inner_tunoff = *off; + ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; + *off += priv->hdrsize; + + switch (priv->type) { + case NFT_INNER_GENEVE: { + struct genevehdr *gnvh, _gnvh; + + gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, + sizeof(_gnvh), &_gnvh); + if (!gnvh) + return -1; + + *off += gnvh->opt_len * 4; + } + break; + default: + break; + } + + return 0; +} + +static int nft_inner_parse(const struct nft_inner *priv, + struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx) +{ + struct nft_inner_tun_ctx ctx = {}; + u32 off = pkt->inneroff; + + if (priv->flags & NFT_INNER_HDRSIZE && + nft_inner_parse_tunhdr(priv, pkt, &ctx, &off) < 0) + return -1; + + if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { + if (nft_inner_parse_l2l3(priv, pkt, &ctx, off) < 0) + return -1; + } else if (priv->flags & NFT_INNER_TH) { + ctx.inner_thoff = off; + ctx.flags |= NFT_PAYLOAD_CTX_INNER_TH; + } + + *tun_ctx = ctx; + tun_ctx->type = priv->type; + pkt->flags |= NFT_PKTINFO_INNER_FULL; + + return 0; +} + +static bool nft_inner_parse_needed(const struct nft_inner *priv, + const struct nft_pktinfo *pkt, + const struct nft_inner_tun_ctx *tun_ctx) +{ + if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) + return true; + + if (priv->type != tun_ctx->type) + return true; + + return false; +} + +static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_inner_tun_ctx *tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); + const struct nft_inner *priv = nft_expr_priv(expr); + + if (nft_payload_inner_offset(pkt) < 0) + goto err; + + if (nft_inner_parse_needed(priv, pkt, tun_ctx) && + nft_inner_parse(priv, (struct nft_pktinfo *)pkt, tun_ctx) < 0) + goto err; + + switch (priv->expr_type) { + case NFT_INNER_EXPR_PAYLOAD: + nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx); + break; + case NFT_INNER_EXPR_META: + nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx); + break; + default: + WARN_ON_ONCE(1); + goto err; + } + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { + [NFTA_INNER_NUM] = { .type = NLA_U32 }, + [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, + [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, + [NFTA_INNER_TYPE] = { .type = NLA_U32 }, + [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + const struct nlattr *attr; + struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; +}; + +static int nft_inner_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_inner *priv = nft_expr_priv(expr); + u32 flags, hdrsize, type, num; + struct nft_expr_info expr_info; + int err; + + if (!tb[NFTA_INNER_FLAGS] || + !tb[NFTA_INNER_HDRSIZE] || + !tb[NFTA_INNER_TYPE] || + !tb[NFTA_INNER_EXPR]) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); + if (flags & ~NFT_INNER_MASK) + return -EOPNOTSUPP; + + num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); + if (num != 0) + return -EOPNOTSUPP; + + hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); + type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); + + if (type > U8_MAX) + return -EINVAL; + + if (flags & NFT_INNER_HDRSIZE) { + if (hdrsize == 0 || hdrsize > 64) + return -EOPNOTSUPP; + } + + priv->flags = flags; + priv->hdrsize = hdrsize; + priv->type = type; + + err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); + if (err < 0) + return err; + + priv->expr.ops = expr_info.ops; + + if (!strcmp(expr_info.ops->type->name, "payload")) + priv->expr_type = NFT_INNER_EXPR_PAYLOAD; + else if (!strcmp(expr_info.ops->type->name, "meta")) + priv->expr_type = NFT_INNER_EXPR_META; + else + return -EINVAL; + + err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, + (const struct nlattr * const*)expr_info.tb); + if (err < 0) + return err; + + return 0; +} + +static int nft_inner_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_inner *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || + nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || + nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || + nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) + goto nla_put_failure; + + if (nft_expr_dump(skb, NFTA_INNER_EXPR, + (struct nft_expr *)&priv->expr) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_inner_ops = { + .type = &nft_inner_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), + .eval = nft_inner_eval, + .init = nft_inner_init, + .dump = nft_inner_dump, +}; + +struct nft_expr_type nft_inner_type __read_mostly = { + .name = "inner", + .ops = &nft_inner_ops, + .policy = nft_inner_policy, + .maxattr = NFTA_INNER_MAX, + .owner = THIS_MODULE, +}; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 55d2d49c3425..8c39adeebb5c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -831,9 +831,71 @@ nft_meta_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EINVAL); } +static int nft_meta_inner_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_PROTOCOL: + len = sizeof(u16); + break; + case NFT_META_L4PROTO: + len = sizeof(u32); + break; + default: + return -EOPNOTSUPP; + } + priv->len = len; + + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); +} + +void nft_meta_inner_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + switch (priv->key) { + case NFT_META_PROTOCOL: + nft_reg_store16(dest, (__force u16)tun_ctx->llproto); + break; + case NFT_META_L4PROTO: + if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TH)) + goto err; + + nft_reg_store8(dest, tun_ctx->l4proto); + break; + default: + WARN_ON_ONCE(1); + goto err; + } + return; + +err: + regs->verdict.code = NFT_BREAK; +} +EXPORT_SYMBOL_GPL(nft_meta_inner_eval); + +static const struct nft_expr_ops nft_meta_inner_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .init = nft_meta_inner_init, + .dump = nft_meta_get_dump, + /* direct call to nft_meta_inner_eval(). */ +}; + struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", .select_ops = nft_meta_select_ops, + .inner_ops = &nft_meta_inner_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 5d8d91b3904d..74e0eea4abac 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -82,7 +82,6 @@ static void nft_objref_activate(const struct nft_ctx *ctx, obj->use++; } -static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), @@ -195,7 +194,6 @@ static void nft_objref_map_destroy(const struct nft_ctx *ctx, nf_tables_destroy_set(ctx, priv->set); } -static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_map_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), @@ -233,28 +231,10 @@ static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, }; -static struct nft_expr_type nft_objref_type __read_mostly = { +struct nft_expr_type nft_objref_type __read_mostly = { .name = "objref", .select_ops = nft_objref_select_ops, .policy = nft_objref_policy, .maxattr = NFTA_OBJREF_MAX, .owner = THIS_MODULE, }; - -static int __init nft_objref_module_init(void) -{ - return nft_register_expr(&nft_objref_type); -} - -static void __exit nft_objref_module_exit(void) -{ - nft_unregister_expr(&nft_objref_type); -} - -module_init(nft_objref_module_init); -module_exit(nft_objref_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_ALIAS_NFT_EXPR("objref"); -MODULE_DESCRIPTION("nftables stateful object reference module"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 088244f9d838..9d2ac764a14c 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -19,6 +19,7 @@ /* For layer 4 checksum field offset. */ #include <linux/tcp.h> #include <linux/udp.h> +#include <net/gre.h> #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -100,6 +101,40 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) pkt->inneroff = thoff + __tcp_hdrlen(th); } break; + case IPPROTO_GRE: { + u32 offset = sizeof(struct gre_base_hdr), version; + struct gre_base_hdr *gre, _gre; + + gre = skb_header_pointer(pkt->skb, thoff, sizeof(_gre), &_gre); + if (!gre) + return -1; + + version = gre->flags & GRE_VERSION; + switch (version) { + case GRE_VERSION_0: + if (gre->flags & GRE_ROUTING) + return -1; + + if (gre->flags & GRE_CSUM) { + offset += sizeof_field(struct gre_full_hdr, csum) + + sizeof_field(struct gre_full_hdr, reserved1); + } + if (gre->flags & GRE_KEY) + offset += sizeof_field(struct gre_full_hdr, key); + + if (gre->flags & GRE_SEQ) + offset += sizeof_field(struct gre_full_hdr, seq); + break; + default: + return -1; + } + + pkt->inneroff = thoff + offset; + } + break; + case IPPROTO_IPIP: + pkt->inneroff = thoff; + break; default: return -1; } @@ -109,7 +144,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) return 0; } -static int nft_payload_inner_offset(const struct nft_pktinfo *pkt) +int nft_payload_inner_offset(const struct nft_pktinfo *pkt) { if (!(pkt->flags & NFT_PKTINFO_INNER) && __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0) @@ -552,6 +587,92 @@ const struct nft_expr_ops nft_payload_fast_ops = { .offload = nft_payload_offload, }; +void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + u32 *dest = ®s->data[priv->dreg]; + int offset; + + if (priv->len % NFT_REG32_SIZE) + dest[priv->len / NFT_REG32_SIZE] = 0; + + switch (priv->base) { + case NFT_PAYLOAD_TUN_HEADER: + if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TUN)) + goto err; + + offset = tun_ctx->inner_tunoff; + break; + case NFT_PAYLOAD_LL_HEADER: + if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_LL)) + goto err; + + offset = tun_ctx->inner_lloff; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_NH)) + goto err; + + offset = tun_ctx->inner_nhoff; + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TH)) + goto err; + + offset = tun_ctx->inner_thoff; + break; + default: + WARN_ON_ONCE(1); + goto err; + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_inner_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + u32 base; + + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_TUN_HEADER: + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return -EOPNOTSUPP; + } + + priv->base = base; + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + + return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); +} + +static const struct nft_expr_ops nft_payload_inner_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .init = nft_payload_inner_init, + .dump = nft_payload_dump, + /* direct call to nft_payload_inner_eval(). */ +}; + static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum) { *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum)); @@ -665,6 +786,16 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src, return 0; } +struct nft_payload_set { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + u8 sreg; + u8 csum_type; + u8 csum_offset; + u8 csum_flags; +}; + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -885,6 +1016,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", .select_ops = nft_payload_select_ops, + .inner_ops = &nft_payload_inner_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 203e24ae472c..b26c1dcfc27b 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -34,7 +34,7 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: - if ((prandom_u32() & 0x7FFFFFFF) < info->u.random.probability) + if ((get_random_u32() & 0x7FFFFFFF) < info->u.random.probability) ret = !ret; break; case XT_STATISTIC_MODE_NTH: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a662e8a5ff84..b10d5e50b99d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -812,6 +812,17 @@ static int netlink_release(struct socket *sock) } sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + + /* Because struct net might disappear soon, do not keep a pointer. */ + if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + /* Because of deferred_put_nlk_sk and use of work queue, + * it is possible netns will be freed before this socket. + */ + sock_net_set(sk, &init_net); + __netns_tracker_alloc(&init_net, &sk->ns_tracker, + false, GFP_KERNEL); + } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -2488,19 +2499,24 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, flags |= NLM_F_ACK_TLVS; skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); - if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; - sk_error_report(NETLINK_CB(in_skb).sk); - return; - } + if (!skb) + goto err_bad_put; rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, flags); + NLMSG_ERROR, sizeof(*errmsg), flags); + if (!rep) + goto err_bad_put; errmsg = nlmsg_data(rep); errmsg->error = err; - unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) - ? nlh->nlmsg_len : sizeof(*nlh), - /* Bounds checked by the skb layer. */); + errmsg->msg = *nlh; + + if (!(flags & NLM_F_CAPPED)) { + if (!nlmsg_append(skb, nlmsg_len(nlh))) + goto err_bad_put; + + memcpy(errmsg->msg.nlmsg_data, nlh->nlmsg_data, + nlmsg_len(nlh)); + } if (tlvlen) netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); @@ -2508,6 +2524,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, nlmsg_end(skb, rep); nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); + + return; + +err_bad_put: + NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + sk_error_report(NETLINK_CB(in_skb).sk); } EXPORT_SYMBOL(netlink_ack); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 39b7c00e4cef..3e16527beb91 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -78,10 +78,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; +/* We need the last attribute with non-zero ID therefore a 2-entry array */ +static struct nla_policy genl_policy_reject_all[] = { + { .type = NLA_REJECT }, + { .type = NLA_REJECT }, +}; + static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id); +static void +genl_op_fill_in_reject_policy(const struct genl_family *family, + struct genl_ops *op) +{ + BUILD_BUG_ON(ARRAY_SIZE(genl_policy_reject_all) - 1 != 1); + + if (op->policy || op->cmd < family->resv_start_op) + return; + + op->policy = genl_policy_reject_all; + op->maxattr = 1; +} + static const struct genl_family *genl_family_find_byid(unsigned int id) { return idr_find(&genl_fam_idr, id); @@ -113,6 +132,8 @@ static void genl_op_from_full(const struct genl_family *family, op->maxattr = family->maxattr; if (!op->policy) op->policy = family->policy; + + genl_op_fill_in_reject_policy(family, op); } static int genl_get_cmd_full(u32 cmd, const struct genl_family *family, @@ -142,6 +163,8 @@ static void genl_op_from_small(const struct genl_family *family, op->maxattr = family->maxattr; op->policy = family->policy; + + genl_op_fill_in_reject_policy(family, op); } static int genl_get_cmd_small(u32 cmd, const struct genl_family *family, @@ -357,6 +380,8 @@ static int genl_validate_ops(const struct genl_family *family) genl_get_cmd_by_index(i, family, &op); if (op.dumpit == NULL && op.doit == NULL) return -EINVAL; + if (WARN_ON(op.cmd >= family->resv_start_op && op.validate)) + return -EINVAL; for (j = i + 1; j < genl_get_cmd_cnt(family); j++) { struct genl_ops op2; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a193cce2a75..dbe5258e13ff 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -24,6 +24,7 @@ #include <linux/sched.h> #include <linux/bitops.h> #include <linux/skbuff.h> +#include <linux/kcov.h> #include "../nfc.h" #include <net/nfc/nci.h> @@ -1472,6 +1473,7 @@ static void nci_tx_work(struct work_struct *work) skb = skb_dequeue(&ndev->tx_q); if (!skb) return; + kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Check if data flow control is used */ if (atomic_read(&conn_info->credits_cnt) != @@ -1487,6 +1489,7 @@ static void nci_tx_work(struct work_struct *work) mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + kcov_remote_stop(); } } @@ -1497,7 +1500,8 @@ static void nci_rx_work(struct work_struct *work) struct nci_dev *ndev = container_of(work, struct nci_dev, rx_work); struct sk_buff *skb; - while ((skb = skb_dequeue(&ndev->rx_q))) { + for (; (skb = skb_dequeue(&ndev->rx_q)); kcov_remote_stop()) { + kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Send copy to sniffer */ nfc_send_to_raw_sock(ndev->nfc_dev, skb, @@ -1551,6 +1555,7 @@ static void nci_cmd_work(struct work_struct *work) if (!skb) return; + kcov_remote_start_common(skb_get_kcov_handle(skb)); atomic_dec(&ndev->cmd_cnt); pr_debug("NCI TX: MT=cmd, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", @@ -1563,6 +1568,7 @@ static void nci_cmd_work(struct work_struct *work) mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); + kcov_remote_stop(); } } diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 78c4b6addf15..de175318a3a0 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -14,6 +14,7 @@ #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> +#include <linux/kcov.h> struct nci_data { u8 conn_id; @@ -409,7 +410,8 @@ static void nci_hci_msg_rx_work(struct work_struct *work) const struct nci_hcp_message *message; u8 pipe, type, instruction; - while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { + for (; (skb = skb_dequeue(&hdev->msg_rx_queue)); kcov_remote_stop()) { + kcov_remote_start_common(skb_get_kcov_handle(skb)); pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 8dd569765f96..5125392bb68e 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -12,6 +12,7 @@ #include <net/tcp_states.h> #include <linux/nfc.h> #include <linux/export.h> +#include <linux/kcov.h> #include "nfc.h" @@ -189,6 +190,7 @@ static void rawsock_tx_work(struct work_struct *work) } skb = skb_dequeue(&sk->sk_write_queue); + kcov_remote_start_common(skb_get_kcov_handle(skb)); sock_hold(sk); rc = nfc_data_exchange(dev, target_idx, skb, @@ -197,6 +199,7 @@ static void rawsock_tx_work(struct work_struct *work) rawsock_report_error(sk, rc); sock_put(sk); } + kcov_remote_stop(); } static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 868db4669a29..ca3ebfdb3023 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1033,7 +1033,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, actions = nla_next(sample_arg, &rem); if ((arg->probability != U32_MAX) && - (!arg->probability || prandom_u32() > arg->probability)) { + (!arg->probability || get_random_u32() > arg->probability)) { if (last) consume_skb(skb); return 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index c8a9075ddd0a..f95b716ea96d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -716,9 +716,9 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); + start = u64_stats_fetch_begin(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; @@ -1616,7 +1616,8 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, if (IS_ERR(dp)) return; - WARN(dp->user_features, "Dropping previously announced user features\n"); + pr_warn("%s: Dropping previously announced user features\n", + ovs_dp_name(dp)); dp->user_features = 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4a07ab094a84..ead5418c126e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2309,7 +2309,7 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); + sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d4a2db0b2299..0a0e4c283f02 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -205,9 +205,9 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma) stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); ma->masks_usage_zero_cntr[i] += counter; } @@ -1136,10 +1136,9 @@ void ovs_flow_masks_rebalance(struct flow_table *table) stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; - } while (u64_stats_fetch_retry_irq(&stats->syncp, - start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); masks_and_count[i].counter += counter; } diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 89a8e1501809..b10e1602c6b1 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -91,7 +91,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { - rtnl_delete_link(dev); + rtnl_delete_link(dev, 0, NULL); rtnl_unlock(); ovs_vport_free(vport); goto error; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index e6b5e76a962a..4014c9b5eb79 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -57,7 +57,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { - rtnl_delete_link(dev); + rtnl_delete_link(dev, 0, NULL); rtnl_unlock(); ovs_vport_free(vport); return ERR_PTR(err); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 2f61d5bdce1a..903537a5da22 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -172,7 +172,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) * if it's not already shutting down. */ if (vport->dev->reg_state == NETREG_REGISTERED) - rtnl_delete_link(vport->dev); + rtnl_delete_link(vport->dev, 0, NULL); netdev_put(vport->dev, &vport->dev_tracker); vport->dev = NULL; rtnl_unlock(); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 188e9c1360a1..0b881b043bcf 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -120,7 +120,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { - rtnl_delete_link(dev); + rtnl_delete_link(dev, 0, NULL); rtnl_unlock(); ovs_vport_free(vport); goto error; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d3f6db350de7..44f20cf8a0c0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1350,7 +1350,7 @@ static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) if (READ_ONCE(history[i]) == rxhash) count++; - victim = prandom_u32() % ROLLOVER_HLEN; + victim = prandom_u32_max(ROLLOVER_HLEN); /* Avoid dirtying the cache line if possible */ if (READ_ONCE(history[victim]) != rxhash) @@ -1777,6 +1777,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; + match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -3277,7 +3278,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3288,8 +3289,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } @@ -3561,11 +3562,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); diff --git a/net/rds/bind.c b/net/rds/bind.c index 5b5fb4ca8d3e..97a29172a8ee 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -104,7 +104,7 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, return -EINVAL; last = rover; } else { - rover = max_t(u16, prandom_u32(), 2); + rover = max_t(u16, get_random_u16(), 2); last = rover - 1; } diff --git a/net/rds/message.c b/net/rds/message.c index 44dbc612ef54..b47e4f0a1639 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -366,7 +366,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * struct scatterlist *sg; int ret = 0; int length = iov_iter_count(from); - int total_copied = 0; struct rds_msg_zcopy_info *info; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -404,7 +403,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * ret = -EFAULT; goto err; } - total_copied += copied; length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 4444fd82b66d..c5b86066ff66 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -503,6 +503,9 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } + /* Update ns_tracker to current stack trace and refcounted tracker */ + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index abe1bcc5c797..62d682b96b88 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -25,7 +25,7 @@ static struct tc_action_ops act_gact_ops; static int gact_net_rand(struct tcf_gact *gact) { smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ - if (prandom_u32() % gact->tcfg_pval) + if (prandom_u32_max(gact->tcfg_pval)) return gact->tcf_action; return gact->tcfg_paction; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 5ba36f70e3a1..7a25477f5d99 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -168,7 +168,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, psample_group = rcu_dereference_bh(s->psample_group); /* randomly sample packets according to rate */ - if (psample_group && (prandom_u32() % s->rate == 0)) { + if (psample_group && (prandom_u32_max(s->rate) == 0)) { if (!skb_at_tc_ingress(skb)) { md.in_ifindex = skb->skb_iif; md.out_ifindex = skb->dev->ifindex; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 7f598784fd30..1710780c908a 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -148,6 +148,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, } if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + if (is_tcf_skbedit_ingress(act_flags) && + !(act_flags & TCA_ACT_FLAGS_SKIP_SW)) { + NL_SET_ERR_MSG_MOD(extack, "\"queue_mapping\" option on receive side is hardware only, use skip_sw"); + return -EOPNOTSUPP; + } flags |= SKBEDIT_F_QUEUE_MAPPING; queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } @@ -374,9 +379,12 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); - } else if (is_tcf_skbedit_queue_mapping(act)) { - NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used"); + } else if (is_tcf_skbedit_tx_queue_mapping(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used on transmit side"); return -EOPNOTSUPP; + } else if (is_tcf_skbedit_rx_queue_mapping(act)) { + entry->id = FLOW_ACTION_RX_QUEUE_MAPPING; + entry->rx_queue = tcf_skbedit_rx_queue_mapping(act); } else if (is_tcf_skbedit_inheritdsfield(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); return -EOPNOTSUPP; @@ -394,6 +402,8 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data fl_action->id = FLOW_ACTION_PTYPE; else if (is_tcf_skbedit_priority(act)) fl_action->id = FLOW_ACTION_PRIORITY; + else if (is_tcf_skbedit_rx_queue_mapping(act)) + fl_action->id = FLOW_ACTION_RX_QUEUE_MAPPING; else return -EOPNOTSUPP; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 50566db45949..23d1cfa4f58c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1953,6 +1953,11 @@ static void tfilter_put(struct tcf_proto *tp, void *fh) tp->ops->put(tp, fh); } +static bool is_qdisc_ingress(__u32 classid) +{ + return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); +} + static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2144,6 +2149,8 @@ replay: flags |= TCA_ACT_FLAGS_REPLACE; if (!rtnl_held) flags |= TCA_ACT_FLAGS_NO_RTNL; + if (is_qdisc_ingress(parent)) + flags |= TCA_ACT_FLAGS_AT_INGRESS; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c98af0ada706..4a27dfb1ba0f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1099,12 +1099,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - rtnl_dereference(dev->qdisc), new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 55c6879d2c7e..3ed0c3342189 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -573,7 +573,7 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, /* Simple BLUE implementation. Lack of ECN is deliberate. */ if (vars->p_drop) - drop |= (prandom_u32() < vars->p_drop); + drop |= (get_random_u32() < vars->p_drop); /* Overload the drop_next field as an activity timeout */ if (!vars->count) @@ -2092,11 +2092,11 @@ retry: WARN_ON(host_load > CAKE_QUEUES); - /* The shifted prandom_u32() is a way to apply dithering to - * avoid accumulating roundoff errors + /* The get_random_u16() is a way to apply dithering to avoid + * accumulating roundoff errors */ flow->deficit += (b->flow_quantum * quantum_div[host_load] + - (prandom_u32() >> 16)) >> 16; + get_random_u16()) >> 16; list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2224,8 +2224,12 @@ retry: static void cake_reset(struct Qdisc *sch) { + struct cake_sched_data *q = qdisc_priv(sch); u32 c; + if (!q->tins) + return; + for (c = 0; c < CAKE_MAX_TINS; c++) cake_clear_tin(sch, c); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 99d318b60568..8c4fee063436 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -478,24 +478,26 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, if (opt) { err = fq_codel_change(sch, opt, extack); if (err) - return err; + goto init_failure; } err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) - return err; + goto init_failure; if (!q->flows) { q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_codel_flow), GFP_KERNEL); - if (!q->flows) - return -ENOMEM; - + if (!q->flows) { + err = -ENOMEM; + goto init_failure; + } q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL); - if (!q->backlogs) - return -ENOMEM; - + if (!q->backlogs) { + err = -ENOMEM; + goto alloc_failure; + } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -508,6 +510,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; + +alloc_failure: + kvfree(q->flows); + q->flows = NULL; +init_failure: + q->flows_cnt = 0; + return err; } static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 18f4273a835b..fb00ac40ecb7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -171,7 +171,7 @@ static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) static void init_crandom(struct crndstate *state, unsigned long rho) { state->rho = rho; - state->last = prandom_u32(); + state->last = get_random_u32(); } /* get_crandom - correlated random number generator @@ -184,9 +184,9 @@ static u32 get_crandom(struct crndstate *state) unsigned long answer; if (!state || state->rho == 0) /* no correlation */ - return prandom_u32(); + return get_random_u32(); - value = prandom_u32(); + value = get_random_u32(); rho = (u64)state->rho + 1; answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; state->last = answer; @@ -200,7 +200,7 @@ static u32 get_crandom(struct crndstate *state) static bool loss_4state(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; - u32 rnd = prandom_u32(); + u32 rnd = get_random_u32(); /* * Makes a comparison between rnd and the transition @@ -268,15 +268,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q) switch (clg->state) { case GOOD_STATE: - if (prandom_u32() < clg->a1) + if (get_random_u32() < clg->a1) clg->state = BAD_STATE; - if (prandom_u32() < clg->a4) + if (get_random_u32() < clg->a4) return true; break; case BAD_STATE: - if (prandom_u32() < clg->a2) + if (get_random_u32() < clg->a2) clg->state = GOOD_STATE; - if (prandom_u32() > clg->a3) + if (get_random_u32() > clg->a3) return true; } @@ -513,8 +513,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto finish_segs; } - skb->data[prandom_u32() % skb_headlen(skb)] ^= - 1<<(prandom_u32() % 8); + skb->data[prandom_u32_max(skb_headlen(skb))] ^= + 1<<prandom_u32_max(8); } if (unlikely(sch->q.qlen >= sch->limit)) { @@ -632,7 +632,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (prandom_u32() * + (get_random_u32() * (q->slot_config.max_delay - q->slot_config.min_delay) >> 32); else diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 974038ba6c7b..265c238047a4 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -72,7 +72,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, if (vars->accu_prob >= (MAX_PROB / 2) * 17) return true; - prandom_bytes(&rnd, 8); + get_random_bytes(&rnd, 8); if ((rnd >> BITS_PER_BYTE) < local_prob) { vars->accu_prob = 0; return true; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index e2389fa3cff8..1871a1c0224d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -379,7 +379,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto enqueue; } - r = prandom_u32() & SFB_MAX_PROB; + r = get_random_u16() & SFB_MAX_PROB; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { @@ -455,7 +455,8 @@ static void sfb_reset(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); + if (likely(q->qdisc)) + qdisc_reset(q->qdisc); q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 3460abceba44..63ba5551c13f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -226,8 +226,7 @@ static struct sctp_association *sctp_association_init( /* Create an output queue. */ sctp_outq_init(asoc, &asoc->outqueue); - if (!sctp_ulpq_init(&asoc->ulpq, asoc)) - goto fail_init; + sctp_ulpq_init(&asoc->ulpq, asoc); if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) goto stream_free; @@ -277,7 +276,6 @@ static struct sctp_association *sctp_association_init( stream_free: sctp_stream_free(&asoc->stream); -fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); return NULL; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 171f1a35d205..3e83963d1b8a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5098,13 +5098,17 @@ static void sctp_destroy_sock(struct sock *sk) } /* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk); /* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +} +static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); } @@ -8319,7 +8323,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rover = prandom_u32() % remaining + low; + rover = prandom_u32_max(remaining) + low; do { rover++; @@ -9427,7 +9431,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@ -9448,7 +9452,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = prandom_u32(); + newinet->inet_id = get_random_u16(); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -9662,11 +9666,20 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) -#include <net/transp_v6.h> -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; } struct proto sctpv6_prot = { @@ -9676,8 +9689,8 @@ struct proto sctpv6_prot = { .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index bb22b71df7a3..94727feb07b3 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -490,11 +490,8 @@ static int sctp_enqueue_event(struct sctp_ulpq *ulpq, if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe)) goto out_free; - if (skb_list) - skb_queue_splice_tail_init(skb_list, - &sk->sk_receive_queue); - else - __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_splice_tail_init(skb_list, + &sk->sk_receive_queue); if (!sp->data_ready_signalled) { sp->data_ready_signalled = 1; @@ -504,10 +501,7 @@ static int sctp_enqueue_event(struct sctp_ulpq *ulpq, return 1; out_free: - if (skb_list) - sctp_queue_purge_ulpevents(skb_list); - else - sctp_ulpevent_free(event); + sctp_queue_purge_ulpevents(skb_list); return 0; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0a8510a0c5e6..b05daafd369a 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -38,8 +38,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); /* 1st Level Abstractions */ /* Initialize a ULP queue from a block of memory. */ -struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, - struct sctp_association *asoc) +void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc) { memset(ulpq, 0, sizeof(struct sctp_ulpq)); @@ -48,8 +47,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; - - return ulpq; } @@ -259,10 +256,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) return 1; out_free: - if (skb_list) - sctp_queue_purge_ulpevents(skb_list); - else - sctp_ulpevent_free(event); + sctp_queue_purge_ulpevents(skb_list); return 0; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e6ee797640b4..c305d8dd23f8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -896,7 +896,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); - if (smc_wr_alloc_lgr_mem(lgr)) + rc = smc_wr_alloc_lgr_mem(lgr); + if (rc) goto free_wq; smc_llc_lgr_init(lgr, smc); diff --git a/net/socket.c b/net/socket.c index 00da9ce3dba0..55c5d536e5f6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2199,13 +2199,7 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, static bool sock_use_custom_sol_socket(const struct socket *sock) { - const struct sock *sk = sock->sk; - - /* Use sock->ops->setsockopt() for MPTCP */ - return IS_ENABLED(CONFIG_MPTCP) && - sk->sk_protocol == IPPROTO_MPTCP && - sk->sk_type == SOCK_STREAM && - (sk->sk_family == AF_INET || sk->sk_family == AF_INET6); + return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); } /* diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 5f96e75f9eec..48337687848c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen) /* initialize to random value */ if (i == 0) { - i = prandom_u32(); - i = (i << 32) | prandom_u32(); + i = get_random_u32(); + i = (i << 32) | get_random_u32(); } switch (conflen) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index c3c693b51c94..f075a9fb5ccc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -677,7 +677,7 @@ static void cache_limit_defers(void) /* Consider removing either the first or the last */ if (cache_defer_cnt > DFR_MAX) { - if (prandom_u32() & 1) + if (prandom_u32_max(2)) discard = list_entry(cache_defer_list.next, struct cache_deferred_req, recent); else diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 71dc26373444..656cec208371 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1865,7 +1865,7 @@ xprt_alloc_xid(struct rpc_xprt *xprt) static void xprt_init_xid(struct rpc_xprt *xprt) { - xprt->xid = prandom_u32(); + xprt->xid = get_random_u32(); } static void diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f34d5427b66c..915b9902f673 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1619,7 +1619,7 @@ static int xs_get_random_port(void) if (max < min) return -EADDRINUSE; range = max - min + 1; - rand = (unsigned short) prandom_u32() % range; + rand = prandom_u32_max(range); return rand + min; } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index da69e1abf68f..e8630707901e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -148,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, { struct net *net = d->net; struct tipc_net *tn = tipc_net(net); - bool trial = time_before(jiffies, tn->addr_trial_end); u32 self = tipc_own_addr(net); + bool trial = time_before(jiffies, tn->addr_trial_end) && !self; if (mtyp == DSC_TRIAL_FAIL_MSG) { if (!trial) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f1c3b8eb4b3d..e902b01ea3cb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3010,7 +3010,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk) struct net *net = sock_net(sk); struct tipc_net *tn = net_generic(net, tipc_net_id); u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; - u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + u32 portid = prandom_u32_max(remaining) + TIPC_MIN_PORT; while (remaining--) { portid++; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 5522865deae9..d92ec92f0b71 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -450,12 +450,19 @@ static void tipc_conn_data_ready(struct sock *sk) static void tipc_topsrv_accept(struct work_struct *work) { struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); - struct socket *lsock = srv->listener; - struct socket *newsock; + struct socket *newsock, *lsock; struct tipc_conn *con; struct sock *newsk; int ret; + spin_lock_bh(&srv->idr_lock); + if (!srv->listener) { + spin_unlock_bh(&srv->idr_lock); + return; + } + lsock = srv->listener; + spin_unlock_bh(&srv->idr_lock); + while (1) { ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) @@ -489,7 +496,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); srv = sk->sk_user_data; - if (srv->listener) + if (srv) queue_work(srv->rcv_wq, &srv->awork); read_unlock_bh(&sk->sk_callback_lock); } @@ -568,7 +575,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; - *(u32 *)&sub.usr_handle = port; + *(u64 *)&sub.usr_handle = (u64)port; con = tipc_conn_alloc(tipc_topsrv(net)); if (IS_ERR(con)) @@ -699,8 +706,9 @@ static void tipc_topsrv_stop(struct net *net) __module_get(lsock->sk->sk_prot_creator->owner); srv->listener = NULL; spin_unlock_bh(&srv->idr_lock); - sock_release(lsock); + tipc_topsrv_work_stop(srv); + sock_release(lsock); idr_destroy(&srv->conn_idr); kfree(srv); } diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 9b79e334dbd9..955ac3e0bf4d 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -273,7 +273,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp) return desc.error; } -static int tls_strp_read_short(struct tls_strparser *strp) +static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) { struct skb_shared_info *shinfo; struct page *page; @@ -283,7 +283,7 @@ static int tls_strp_read_short(struct tls_strparser *strp) * to read the data out. Otherwise the connection will stall. * Without pressure threshold of INT_MAX will never be ready. */ - if (likely(!tcp_epollin_ready(strp->sk, INT_MAX))) + if (likely(qshort && !tcp_epollin_ready(strp->sk, INT_MAX))) return 0; shinfo = skb_shinfo(strp->anchor); @@ -315,6 +315,27 @@ static int tls_strp_read_short(struct tls_strparser *strp) return 0; } +static bool tls_strp_check_no_dup(struct tls_strparser *strp) +{ + unsigned int len = strp->stm.offset + strp->stm.full_len; + struct sk_buff *skb; + u32 seq; + + skb = skb_shinfo(strp->anchor)->frag_list; + seq = TCP_SKB_CB(skb)->seq; + + while (skb->len < len) { + seq += skb->len; + len -= skb->len; + skb = skb->next; + + if (TCP_SKB_CB(skb)->seq != seq) + return false; + } + + return true; +} + static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) { struct tcp_sock *tp = tcp_sk(strp->sk); @@ -373,7 +394,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) return tls_strp_read_copyin(strp); if (inq < strp->stm.full_len) - return tls_strp_read_short(strp); + return tls_strp_read_copy(strp, true); if (!strp->stm.full_len) { tls_strp_load_anchor_with_queue(strp, inq); @@ -387,9 +408,12 @@ static int tls_strp_read_sock(struct tls_strparser *strp) strp->stm.full_len = sz; if (!strp->stm.full_len || inq < strp->stm.full_len) - return tls_strp_read_short(strp); + return tls_strp_read_copy(strp, true); } + if (!tls_strp_check_no_dup(strp)) + return tls_strp_read_copy(strp, false); + strp->msg_ready = 1; tls_rx_msg_ready(strp); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 15dbb392c875..b3545fc68097 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1147,7 +1147,7 @@ static int unix_autobind(struct sock *sk) addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); - ordernum = prandom_u32(); + ordernum = get_random_u32(); lastnum = ordernum & 0xFFFFF; retry: ordernum = (ordernum + 1) & 0xFFFFF; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d45d5366115a..dc2763540393 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -204,6 +204,7 @@ void wait_for_unix_gc(void) /* The external entry point: unix_gc() */ void unix_gc(void) { + struct sk_buff *next_skb, *skb; struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -297,11 +298,30 @@ void unix_gc(void) spin_unlock(&unix_gc_lock); + /* We need io_uring to clean its registered files, ignore all io_uring + * originated skbs. It's fine as io_uring doesn't keep references to + * other io_uring instances and so killing all other files in the cycle + * will put all io_uring references forcing it to go through normal + * release.path eventually putting registered files. + */ + skb_queue_walk_safe(&hitlist, skb, next_skb) { + if (skb->scm_io_uring) { + __skb_unlink(skb, &hitlist); + skb_queue_tail(&skb->sk->sk_receive_queue, skb); + } + } + /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); + /* There could be io_uring registered files, just push them back to + * the inflight list + */ + list_for_each_entry_safe(u, next, &gc_candidates, link) + list_move_tail(&u->link, &gc_inflight_list); + /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); diff --git a/net/wireless/core.h b/net/wireless/core.h index 775e16cb99ed..af85d8909935 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -271,6 +271,8 @@ struct cfg80211_event { } ij; struct { u8 bssid[ETH_ALEN]; + const u8 *td_bitmap; + u8 td_bitmap_len; } pa; }; }; @@ -409,7 +411,8 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, + const u8 *td_bitmap, u8 td_bitmap_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 581df7f4c524..58e1fb18f85a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -42,6 +42,10 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, unsigned int link_id; for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) { + cr.links[link_id].status = data->links[link_id].status; + WARN_ON_ONCE(cr.links[link_id].status != WLAN_STATUS_SUCCESS && + (!cr.ap_mld_addr || !cr.links[link_id].bss)); + cr.links[link_id].bss = data->links[link_id].bss; if (!cr.links[link_id].bss) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 597c52236514..148f66edb015 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7780,6 +7780,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) int err; memset(¶ms, 0, sizeof(params)); + params.link_id = nl80211_link_id_or_invalid(info->attrs); /* default to not changing parameters */ params.use_cts_prot = -1; params.use_short_preamble = -1; @@ -16566,7 +16567,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_REG, @@ -17747,6 +17749,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, link_info_size += (cr->links[link].bssid || cr->links[link].bss) ? nla_total_size(ETH_ALEN) : 0; + link_info_size += nla_total_size(sizeof(u16)); } } @@ -17815,7 +17818,9 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) || (cr->links[link].addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, - cr->links[link].addr))) + cr->links[link].addr)) || + nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, + cr->links[link].status)) goto nla_put_failure; nla_nest_end(msg, nested_mlo_links); @@ -17939,7 +17944,8 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, } void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid) + struct net_device *netdev, const u8 *bssid, + const u8 *td_bitmap, u8 td_bitmap_len) { struct sk_buff *msg; void *hdr; @@ -17959,6 +17965,11 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; + if ((td_bitmap_len > 0) && td_bitmap) + if (nla_put(msg, NL80211_ATTR_TD_BITMAP, + td_bitmap_len, td_bitmap)) + goto nla_put_failure; + genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 855d540ddfb9..ba9457e94c43 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -83,7 +83,8 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid); + struct net_device *netdev, const u8 *bssid, + const u8 *td_bitmap, u8 td_bitmap_len); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d513536617bd..4b5b6ee0fe01 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -793,6 +793,10 @@ void __cfg80211_connect_result(struct net_device *dev, } for_each_valid_link(cr, link) { + /* don't do extra lookups for failures */ + if (cr->links[link].status != WLAN_STATUS_SUCCESS) + continue; + if (cr->links[link].bss) continue; @@ -829,6 +833,16 @@ void __cfg80211_connect_result(struct net_device *dev, } memset(wdev->links, 0, sizeof(wdev->links)); + for_each_valid_link(cr, link) { + if (cr->links[link].status == WLAN_STATUS_SUCCESS) + continue; + cr->valid_links &= ~BIT(link); + /* don't require bss pointer for failed links */ + if (!cr->links[link].bss) + continue; + cfg80211_unhold_bss(bss_from_pub(cr->links[link].bss)); + cfg80211_put_bss(wdev->wiphy, cr->links[link].bss); + } wdev->valid_links = cr->valid_links; for_each_valid_link(cr, link) wdev->links[link].client.current_bss = @@ -1237,7 +1251,8 @@ out: } EXPORT_SYMBOL(cfg80211_roamed); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, + const u8 *td_bitmap, u8 td_bitmap_len) { ASSERT_WDEV_LOCK(wdev); @@ -1250,11 +1265,11 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) return; nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, - bssid); + bssid, td_bitmap, td_bitmap_len); } void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, - gfp_t gfp) + const u8 *td_bitmap, u8 td_bitmap_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -1264,12 +1279,15 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, if (WARN_ON(!bssid)) return; - ev = kzalloc(sizeof(*ev), gfp); + ev = kzalloc(sizeof(*ev) + td_bitmap_len, gfp); if (!ev) return; ev->type = EVENT_PORT_AUTHORIZED; memcpy(ev->pa.bssid, bssid, ETH_ALEN); + ev->pa.td_bitmap = ((u8 *)ev) + sizeof(*ev); + ev->pa.td_bitmap_len = td_bitmap_len; + memcpy((void *)ev->pa.td_bitmap, td_bitmap, td_bitmap_len); /* * Use the wdev event list so that if there are pending diff --git a/net/wireless/util.c b/net/wireless/util.c index 1f285b515028..eb277105a878 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -990,7 +990,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; case EVENT_PORT_AUTHORIZED: - __cfg80211_port_authorized(wdev, ev->pa.bssid); + __cfg80211_port_authorized(wdev, ev->pa.bssid, + ev->pa.td_bitmap, + ev->pa.td_bitmap_len); break; } wdev_unlock(wdev); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 81df34b3da6e..3d2fe7712ac5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2072,7 +2072,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } else { u32 spi = 0; for (h = 0; h < high-low+1; h++) { - spi = low + prandom_u32()%(high-low+1); + spi = low + prandom_u32_max(high - low + 1); x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { newspi = htonl(spi); |